[RFC PATCH] drm/ttm, drm/vmwgfx: Have TTM support AMD SEV encryption

2019-05-24 Thread VMware
From: Thomas Hellstrom 

With SEV encryption, all DMA memory must be marked decrypted
(AKA "shared") for devices to be able to read it. In the future we might
want to be able to switch normal (encrypted) memory to decrypted in exactly
the same way as we handle caching states, and that would require additional
memory pools. But for now, rely on memory allocated with
dma_alloc_coherent() which is already decrypted with SEV enabled. Set up
the page protection accordingly. Drivers must detect SEV enabled and switch
to the dma page pool.

This patch has not yet been tested. As a follow-up, we might want to
cache decrypted pages in the dma page pool regardless of their caching
state.

Cc: Christian König 
Signed-off-by: Thomas Hellstrom 
---
 drivers/gpu/drm/ttm/ttm_bo_util.c| 17 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c  |  6 --
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  3 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_blit.c |  6 --
 include/drm/ttm/ttm_bo_driver.h  |  8 +---
 include/drm/ttm/ttm_tt.h |  1 +
 6 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 895d77d799e4..1d6643bd0b01 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -419,11 +419,13 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
page = i * dir + add;
if (old_iomap == NULL) {
pgprot_t prot = ttm_io_prot(old_mem->placement,
+   ttm->page_flags,
PAGE_KERNEL);
ret = ttm_copy_ttm_io_page(ttm, new_iomap, page,
   prot);
} else if (new_iomap == NULL) {
pgprot_t prot = ttm_io_prot(new_mem->placement,
+   ttm->page_flags,
PAGE_KERNEL);
ret = ttm_copy_io_ttm_page(ttm, old_iomap, page,
   prot);
@@ -526,11 +528,11 @@ static int ttm_buffer_object_transfer(struct 
ttm_buffer_object *bo,
return 0;
 }
 
-pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp)
+pgprot_t ttm_io_prot(u32 caching_flags, u32 tt_page_flags, pgprot_t tmp)
 {
/* Cached mappings need no adjustment */
if (caching_flags & TTM_PL_FLAG_CACHED)
-   return tmp;
+   goto check_encryption;
 
 #if defined(__i386__) || defined(__x86_64__)
if (caching_flags & TTM_PL_FLAG_WC)
@@ -548,6 +550,11 @@ pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp)
 #if defined(__sparc__) || defined(__mips__)
tmp = pgprot_noncached(tmp);
 #endif
+
+check_encryption:
+   if (tt_page_flags & TTM_PAGE_FLAG_DECRYPTED)
+   tmp = pgprot_decrypted(tmp);
+
return tmp;
 }
 EXPORT_SYMBOL(ttm_io_prot);
@@ -594,7 +601,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
if (ret)
return ret;
 
-   if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) {
+   if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED) &&
+   !(ttm->page_flags & TTM_PAGE_FLAG_DECRYPTED)) {
/*
 * We're mapping a single page, and the desired
 * page protection is consistent with the bo.
@@ -608,7 +616,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
 * We need to use vmap to get the desired page protection
 * or to make the buffer object look contiguous.
 */
-   prot = ttm_io_prot(mem->placement, PAGE_KERNEL);
+   prot = ttm_io_prot(mem->placement, ttm->page_flags,
+  PAGE_KERNEL);
map->bo_kmap_type = ttm_bo_map_vmap;
map->virtual = vmap(ttm->pages + start_page, num_pages,
0, prot);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 2d9862fcf6fd..e12247edd243 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -245,7 +245,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
goto out_io_unlock;
}
 
-   cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
@@ -255,13 +254,16 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
};
 
ttm = bo->ttm;
+   cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
+   ttm->page_flags, prot);
if (ttm_tt_populate(bo->ttm, &ctx)) {
ret = VM_

[PATCH v4 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources

2019-06-11 Thread VMware
 Emulate coherency by tracking vm accesses.
  * @backup: The backup buffer if any. Protected by resource reserved.
  * @backup_offset: Offset into the backup buffer if any. Protected by resource
  * reserved. Note that only a few resource types can have a @backup_offset
@@ -151,14 +154,16 @@ struct vmw_res_func;
  * @hw_destroy: Callback to destroy the resource on the device, as part of
  * resource destruction.
  */
+struct vmw_resource_dirty;
 struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
-   bool res_dirty;
-   bool backup_dirty;
+   u32 res_dirty : 1;
+   u32 backup_dirty : 1;
+   u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
@@ -166,6 +171,7 @@ struct vmw_resource {
struct list_head lru_head;
struct list_head mob_head;
struct list_head binding_head;
+   struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
 };
@@ -606,6 +612,9 @@ struct vmw_private {
 
/* Validation memory reservation */
struct vmw_validation_mem vvm;
+
+   /* VM operations */
+   struct vm_operations_struct vm_ops;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private 
*dev_priv);
 extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
 void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+  pgoff_t end);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log);
 #define VMW_DEBUG_USER(fmt, ...)  \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
 
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
 /**
  * Inline helper functions
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 33533d126277..319c1ca35663 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct 
vmw_private *dev_priv,
 offsetof(typeof(*cmd), sid));
 
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 VMW_RES_DIRTY_NONE, user_surface_converter,
 &cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index ..8d154f90bdc0
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/******
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAG

[PATCH v4 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Add the callbacks necessary to implement emulated coherent memory for
surfaces. Add a flag to the gb_surface_create ioctl to indicate that
surface memory should be coherent.
Also bump the drm minor version to signal the availability of coherent
surfaces.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 .../device_include/svga3d_surfacedefs.h   | 233 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c   | 395 +-
 include/uapi/drm/vmwgfx_drm.h |   4 +-
 4 files changed, 629 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h 
b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
 }
 
-
 static inline u32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
   surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ 
svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
 }
 
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+   size_t bytes;
+   size_t img_stride;
+   size_t row_stride;
+   struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+   const struct svga3d_surface_desc *desc;
+   struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+   size_t mip_chain_bytes;
+   size_t sheet_bytes;
+   u32 num_mip_levels;
+   u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+   u32 sub_resource;
+   u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+  u32 mip_level, u32 layer)
+{
+   return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+   SVGA3dSurfaceFormat format,
+   u32 num_mip_levels,
+   u32 num_layers,
+   u32 num_samples,
+   struct svga3dsurface_cache *cache)
+{
+   const struct svga3d_surface_desc *desc;
+   u32 i;
+
+   memset(cache, 0, sizeof(*cache));
+   cache->desc = desc = svga3dsurface_get_desc(format);
+   cache->num_mip_levels = num_mip_levels;
+   cache->num_layers = num_layers;
+   for (i = 0; i < cache->num_mip_levels; i++) {
+   struct svga3dsurface_mip *mip = &cache->mip[i];
+
+   mip->size = svga3dsurface_get_mip_size(*size, i);
+   mip->bytes = svga3dsurface_get_image_buffer_size
+   (desc, &mip->size, 0);
+   mip->row_stride =
+   __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+   desc->bytes_per_block * num_samples;
+   if (!mip->row_stride)
+   goto invalid_di

[PATCH v4 0/9] Emulated coherent graphics memory

2019-06-11 Thread VMware
Planning to merge this through the drm/vmwgfx tree soon, so if there
are any objections, please speak up.

Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation has signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation. This is a first attempt to do that for
the vmwgfx driver.

The mm patches has been out for RFC. I think I have addressed all the
feedback I got, except a possible softdirty breakage. But although the
dirty-tracking and softdirty may write-protect PTEs both care about,
that shouldn't really cause any operation interference. In particular
since we use the hardware dirty PTE bits and softdirty uses other PTE bits.

For the TTM changes they are hopefully in line with the long-term
strategy of making helpers out of what's left of TTM.

The code has been tested and excercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.

Changes v1-v2:
- Addressed a number of typos and formatting issues.
- Added a usage warning for apply_to_pfn_range() and apply_to_page_range()
- Re-evaluated the decision to use apply_to_pfn_range() rather than
  modifying the pagewalk.c. It still looks like generically handling the
  transparent huge page cases requires the mmap_sem to be held at least
  in read mode, so sticking with apply_to_pfn_range() for now.
- The TTM page-fault helper vma copy argument was scratched in favour of
  a pageprot_t argument.
Changes v3:
- Adapted to upstream API changes.
Changes v4:
- Adapted to upstream mmu_notifier changes. (Jerome?)
- Fixed a couple of warnings on 32-bit x86
- Fixed image offset computation on multisample images.
  
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: linux...@kvack.org



[PATCH v4 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources

2019-06-11 Thread VMware
From: Thomas Hellstrom 

With emulated coherent memory we need to be able to quickly look up
a resource from the MOB offset. Instead of traversing a linked list with
O(n) worst case, use an RBtree with O(log n) worst case complexity.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c   |  5 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h  | 10 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +---
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 90ca866640fe..e8bc7a7ac031 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
 
WARN_ON(vmw_bo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
 }
@@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object 
*bo)
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
 
WARN_ON(vbo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
 }
@@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
-   INIT_LIST_HEAD(&vmw_bo->res_list);
+   vmw_bo->res_tree = RB_ROOT;
 
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
  ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 27c259395790..9b347923196f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -89,7 +89,7 @@ struct vmw_fpriv {
 /**
  * struct vmw_buffer_object - TTM buffer object with vmwgfx additions
  * @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
  * @pin_count: pin depth
  * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
  * @map: Kmap object for semi-persistent mappings
@@ -98,7 +98,7 @@ struct vmw_fpriv {
  */
 struct vmw_buffer_object {
struct ttm_buffer_object base;
-   struct list_head res_list;
+   struct rb_root res_tree;
s32 pin_count;
/* Not ref-counted.  Protected by binding_mutex */
struct vmw_resource *dx_query_ctx;
@@ -146,8 +146,8 @@ struct vmw_res_func;
  * pin-count greater than zero. It is not on the resource LRU lists and its
  * backup buffer is pinned. Hence it can't be evicted.
  * @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
  * @lru_head: List head for the LRU list. Protected by 
@dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
  * @binding_head: List head for the context binding list. Protected by
  * the @dev_priv::binding_mutex
  * @res_free: The resource destructor.
@@ -168,8 +168,8 @@ struct vmw_resource {
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+   struct rb_node mob_node;
struct list_head lru_head;
-   struct list_head mob_head;
struct list_head binding_head;
struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
@@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, 
pgoff_t start,
  */
 static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
 {
-   return !list_empty(&res->mob_head);
+   return !RB_EMPTY_NODE(&res->mob_node);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index da9afa83fb4f..b84dd5953886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -41,11 +41,24 @@
 void vmw_resource_mob_attach(struct vmw_resource *res)
 {
struct vmw_buffer_object *backup = res->backup;
+   struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
 
lockdep_assert_held(&backup->base.resv->lock.base);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
-   list_add_tail(&res->mob_head, &backup->res_list);
+
+   while (*new) {
+   struct vmw_resource *this =
+   container_of(*new, struct vmw_resource, mob_node);
+
+   parent = *new;
+   new = (res->backup_offset < this->backup_offset) ?
+   &((*new)->rb_left) : &((*new)->rb_right);
+   }
+
+   rb_link_node(&res->mob_node, parent, new);
+  

[PATCH v4 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Similar to write-coherent resources, make sure that from the user-space
point of view, GPU rendered contents is automatically available for
reading by the CPU.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   7 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c|  73 -
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  | 103 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h |   2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c|   3 +-
 5 files changed, 177 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 9b347923196f..dae3a39bf402 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource 
**p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
 extern struct vmw_resource *
 vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+bool dirtying);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
 void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
   pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+   pgoff_t end, pgoff_t *num_prefault);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
 void vmw_bo_dirty_clear_res(struct vmw_resource *res);
 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end);
 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 8d154f90bdc0..730c51e397dd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct 
vmw_buffer_object *vbo)
}
 }
 
-
 /**
  * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
  * tracking structure
@@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
vmw_bo_dirty_scan_mkwrite(vbo);
 }
 
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+  pgoff_t start, pgoff_t end)
+{
+   struct vmw_bo_dirty *dirty = vbo->dirty;
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+   return;
+
+   apply_as_wrprotect(mapping, start + offset, end - start);
+   apply_as_clean(mapping, start + offset, end - start, offset,
+  &dirty->bitmap[0], &dirty->start, &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end)
+{
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   vmw_bo_dirty_pre_unmap(vbo, start, end);
+   unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+  (loff_t) (end - start) << PAGE_SHIFT);
+}
+
 /**
  * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
  * @vbo: The buffer object
@@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru

[PATCH v4 5/9] drm/ttm: TTM fault handler helpers

2019-06-11 Thread VMware
From: Thomas Hellstrom 

With the vmwgfx dirty tracking, the default TTM fault handler is not
completely sufficient (vmwgfx need to modify the vma->vm_flags member,
and also needs to restrict the number of prefaults).

We also want to replicate the new ttm_bo_vm_reserve() functionality

So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved()
and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other
drivers to use.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: "Christian König"  #v1
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 175 +++-
 include/drm/ttm/ttm_bo_api.h|  10 ++
 2 files changed, 113 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 196e13a0adad..2d9862fcf6fd 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
 #include 
 #include 
 
-#define TTM_BO_VM_NUM_PREFAULT 16
-
 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
 {
@@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct 
ttm_buffer_object *bo,
+ page_offset;
 }
 
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object 
interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ *0 on success and the bo was reserved.
+ *VM_FAULT_RETRY if blocking wait.
+ *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+struct vm_fault *vmf)
 {
-   struct vm_area_struct *vma = vmf->vma;
-   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
-   vma->vm_private_data;
-   struct ttm_bo_device *bdev = bo->bdev;
-   unsigned long page_offset;
-   unsigned long page_last;
-   unsigned long pfn;
-   struct ttm_tt *ttm = NULL;
-   struct page *page;
-   int err;
-   int i;
-   vm_fault_t ret = VM_FAULT_NOPAGE;
-   unsigned long address = vmf->address;
-   struct ttm_mem_type_manager *man =
-   &bdev->man[bo->mem.mem_type];
-   struct vm_area_struct cvma;
-
-   /*
-* Work around locking order reversal in fault / nopfn
-* between mmap_sem and bo_reserve: Perform a trylock operation
-* for reserve, and if it fails, retry the fault after waiting
-* for the buffer to become unreserved.
-*/
if (unlikely(!reservation_object_trylock(bo->resv))) {
if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
@@ -151,14 +148,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
 
+   return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ *   VM_FAULT_NOPAGE on success or pending signal
+ *   VM_FAULT_SIGBUS on unspecified error
+ *   VM_FAULT_OOM on out-of-memory
+ *   VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+   pgprot_t prot,
+   pgoff_t num_prefault)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct vm_area_struct cvma = *vma;
+   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+   vma->vm_private_data;
+   struct ttm_bo_device *bdev = bo->bdev;
+   unsigned long page_offset;
+   unsigned long page_last;
+   unsigned long pfn;
+   struct ttm_tt *ttm = NULL;
+   struct p

[PATCH v4 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Add a pointer to the struct vm_operations_struct in the bo_device, and
assign that pointer to the default value currently used.

The driver can then optionally modify that pointer and the new value
can be used for each new vma created.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c| 1 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++---
 include/drm/ttm/ttm_bo_driver.h | 6 ++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7de667d482a..6953dd264172 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
mutex_lock(&ttm_global_mutex);
list_add_tail(&bdev->device_list, &glob->device_list);
mutex_unlock(&ttm_global_mutex);
+   bdev->vm_ops = &ttm_bo_vm_ops;
 
return 0;
 out_no_sys:
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 6dacff49c1cc..196e13a0adad 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, 
unsigned long addr,
return ret;
 }
 
-static const struct vm_operations_struct ttm_bo_vm_ops = {
+const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
@@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct 
*vma,
if (unlikely(ret != 0))
goto out_unref;
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bdev->vm_ops;
 
/*
 * Note: We're transferring the bo reference to
@@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct 
ttm_buffer_object *bo)
 
ttm_bo_get(bo);
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bo->bdev->vm_ops;
vma->vm_private_data = bo;
vma->vm_flags |= VM_MIXEDMAP;
vma->vm_flags |= VM_IO | VM_DONTEXPAND;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c9b8ba492f24..a2d810a2504d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -442,6 +442,9 @@ extern struct ttm_bo_global {
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @vma_manager: Address space manager
+ * @vm_ops: Pointer to the struct vm_operations_struct used for this
+ * device's VM operations. The driver may override this before the first
+ * mmap() call.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -460,6 +463,7 @@ struct ttm_bo_device {
struct ttm_bo_global *glob;
struct ttm_bo_driver *driver;
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+   const struct vm_operations_struct *vm_ops;
 
/*
 * Protected by internal locks.
@@ -488,6 +492,8 @@ struct ttm_bo_device {
bool no_retry;
 };
 
+extern const struct vm_operations_struct ttm_bo_vm_ops;
+
 /**
  * struct ttm_lru_bulk_move_pos
  *
-- 
2.20.1



Re: [PATCH v4 3/9] mm: Add write-protect and clean utilities for address space ranges

2019-06-11 Thread VMware

Hi, Nadav,

On 6/11/19 7:21 PM, Nadav Amit wrote:

On Jun 11, 2019, at 5:24 AM, Thomas Hellström (VMware) 
 wrote:

From: Thomas Hellstrom 


[ snip ]


+/**
+ * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ *
+ * Return: Always zero.
+ */
+static int apply_pt_wrprotect(pte_t *pte, pgtable_t token,
+ unsigned long addr,
+ struct pfn_range_apply *closure)
+{
+   struct apply_as *aas = container_of(closure, typeof(*aas), base);
+   pte_t ptent = *pte;
+
+   if (pte_write(ptent)) {
+   pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
+
+   ptent = pte_wrprotect(old_pte);
+   ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
+   aas->total++;
+   aas->start = min(aas->start, addr);
+   aas->end = max(aas->end, addr + PAGE_SIZE);
+   }
+
+   return 0;
+}
+
+/**
+ * struct apply_as_clean - Closure structure for apply_as_clean
+ * @base: struct apply_as we derive from
+ * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
+ * @bitmap: Bitmap with one bit for each page offset in the address_space range
+ * covered.
+ * @start: Address_space page offset of first modified pte relative
+ * to @bitmap_pgoff
+ * @end: Address_space page offset of last modified pte relative
+ * to @bitmap_pgoff
+ */
+struct apply_as_clean {
+   struct apply_as base;
+   pgoff_t bitmap_pgoff;
+   unsigned long *bitmap;
+   pgoff_t start;
+   pgoff_t end;
+};
+
+/**
+ * apply_pt_clean - Leaf pte callback to clean a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as_clean
+ *
+ * The function cleans a pte and records the range in
+ * virtual address space of touched ptes for efficient TLB flushes.
+ * It also records dirty ptes in a bitmap representing page offsets
+ * in the address_space, as well as the first and last of the bits
+ * touched.
+ *
+ * Return: Always zero.
+ */
+static int apply_pt_clean(pte_t *pte, pgtable_t token,
+ unsigned long addr,
+ struct pfn_range_apply *closure)
+{
+   struct apply_as *aas = container_of(closure, typeof(*aas), base);
+   struct apply_as_clean *clean = container_of(aas, typeof(*clean), base);
+   pte_t ptent = *pte;
+
+   if (pte_dirty(ptent)) {
+   pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) +
+   aas->vma->vm_pgoff - clean->bitmap_pgoff;
+   pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
+
+   ptent = pte_mkclean(old_pte);
+   ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
+
+   aas->total++;
+   aas->start = min(aas->start, addr);
+   aas->end = max(aas->end, addr + PAGE_SIZE);
+
+   __set_bit(pgoff, clean->bitmap);
+   clean->start = min(clean->start, pgoff);
+   clean->end = max(clean->end, pgoff + 1);
+   }
+
+   return 0;

Usually, when a PTE is write-protected, or when a dirty-bit is cleared, the
TLB flush must be done while the page-table lock for that specific table is
taken (i.e., within apply_pt_clean() and apply_pt_wrprotect() in this case).

Otherwise, in the case of apply_pt_clean() for example, another core might
shortly after (before the TLB flush) write to the same page whose PTE was
changed. The dirty-bit in such case might not be set, and the change get
lost.


Hmm. Let's assume that was the case, we have two possible situations:

A: pt_clean

1. That core's TLB entry is invalid. It will set the PTE dirty bit and 
continue. The dirty bit will probably remain set after the TLB flush.
2. That core's TLB entry is valid. It will just continue. The dirty bit 
will remain clear after the TLB flush.


But I fail to see how having the TLB flush within the page table lock 
would help in this case. Since the writing core will never attempt to 
take it? In any case, if such a race occurs, the corresponding bit in 
the bitmap would have been set and we've recorded that the page is dirty.


B: wrprotect situation, the situation is a bit different:

1. That core's TLB entry is invalid. It will read the PTE, cause a fault 
and block in mkwrite() on an external address space lock which is held 
over this operation. (Is

Re: [PATCH v4 3/9] mm: Add write-protect and clean utilities for address space ranges

2019-06-11 Thread VMware

On 6/11/19 9:10 PM, Nadav Amit wrote:

On Jun 11, 2019, at 11:26 AM, Thomas Hellström (VMware) 
 wrote:

Hi, Nadav,

On 6/11/19 7:21 PM, Nadav Amit wrote:

On Jun 11, 2019, at 5:24 AM, Thomas Hellström (VMware) 
 wrote:

From: Thomas Hellstrom 

[ snip ]


+/**
+ * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ *
+ * Return: Always zero.
+ */
+static int apply_pt_wrprotect(pte_t *pte, pgtable_t token,
+ unsigned long addr,
+ struct pfn_range_apply *closure)
+{
+   struct apply_as *aas = container_of(closure, typeof(*aas), base);
+   pte_t ptent = *pte;
+
+   if (pte_write(ptent)) {
+   pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
+
+   ptent = pte_wrprotect(old_pte);
+   ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
+   aas->total++;
+   aas->start = min(aas->start, addr);
+   aas->end = max(aas->end, addr + PAGE_SIZE);
+   }
+
+   return 0;
+}
+
+/**
+ * struct apply_as_clean - Closure structure for apply_as_clean
+ * @base: struct apply_as we derive from
+ * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
+ * @bitmap: Bitmap with one bit for each page offset in the address_space range
+ * covered.
+ * @start: Address_space page offset of first modified pte relative
+ * to @bitmap_pgoff
+ * @end: Address_space page offset of last modified pte relative
+ * to @bitmap_pgoff
+ */
+struct apply_as_clean {
+   struct apply_as base;
+   pgoff_t bitmap_pgoff;
+   unsigned long *bitmap;
+   pgoff_t start;
+   pgoff_t end;
+};
+
+/**
+ * apply_pt_clean - Leaf pte callback to clean a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as_clean
+ *
+ * The function cleans a pte and records the range in
+ * virtual address space of touched ptes for efficient TLB flushes.
+ * It also records dirty ptes in a bitmap representing page offsets
+ * in the address_space, as well as the first and last of the bits
+ * touched.
+ *
+ * Return: Always zero.
+ */
+static int apply_pt_clean(pte_t *pte, pgtable_t token,
+ unsigned long addr,
+ struct pfn_range_apply *closure)
+{
+   struct apply_as *aas = container_of(closure, typeof(*aas), base);
+   struct apply_as_clean *clean = container_of(aas, typeof(*clean), base);
+   pte_t ptent = *pte;
+
+   if (pte_dirty(ptent)) {
+   pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) +
+   aas->vma->vm_pgoff - clean->bitmap_pgoff;
+   pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
+
+   ptent = pte_mkclean(old_pte);
+   ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
+
+   aas->total++;
+   aas->start = min(aas->start, addr);
+   aas->end = max(aas->end, addr + PAGE_SIZE);
+
+   __set_bit(pgoff, clean->bitmap);
+   clean->start = min(clean->start, pgoff);
+   clean->end = max(clean->end, pgoff + 1);
+   }
+
+   return 0;

Usually, when a PTE is write-protected, or when a dirty-bit is cleared, the
TLB flush must be done while the page-table lock for that specific table is
taken (i.e., within apply_pt_clean() and apply_pt_wrprotect() in this case).

Otherwise, in the case of apply_pt_clean() for example, another core might
shortly after (before the TLB flush) write to the same page whose PTE was
changed. The dirty-bit in such case might not be set, and the change get
lost.

Hmm. Let's assume that was the case, we have two possible situations:

A: pt_clean

1. That core's TLB entry is invalid. It will set the PTE dirty bit and 
continue. The dirty bit will probably remain set after the TLB flush.

I guess you mean the PTE is not cached in the TLB.

Yes.



2. That core's TLB entry is valid. It will just continue. The dirty bit will 
remain clear after the TLB flush.

But I fail to see how having the TLB flush within the page table lock would 
help in this case. Since the writing core will never attempt to take it? In any 
case, if such a race occurs, the corresponding bit in the bitmap would have 
been set and we've recorded that the page is dirty.

I don’t understand. What do you mean “recorded that the page is dirty”?

[PATCH 1/2] drm/vmwgfx: Use the backdoor port if the HB port is not available

2019-06-11 Thread VMware
From: Thomas Hellstrom 

The HB port may not be available for various reasons. Either it has been
disabled by a config option or by the hypervisor for other reasons.
In that case, make sure we have a backup plan and use the backdoor port
instead with a performance penalty.

Cc: sta...@vger.kernel.org
Fixes: 89da76fde68d ("drm/vmwgfx: Add VMWare host messaging capability")
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 146 ++--
 1 file changed, 117 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 8b9270f31409..e4e09d47c5c0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -136,6 +136,114 @@ static int vmw_close_channel(struct rpc_channel *channel)
return 0;
 }
 
+/**
+ * vmw_port_hb_out - Send the message payload either through the
+ * high-bandwidth port if available, or through the backdoor otherwise.
+ * @channel: The rpc channel.
+ * @msg: NULL-terminated message.
+ * @hb: Whether the high-bandwidth port is available.
+ *
+ * Return: The port status.
+ */
+static unsigned long vmw_port_hb_out(struct rpc_channel *channel,
+const char *msg, bool hb)
+{
+   unsigned long si, di, eax, ebx, ecx, edx;
+   unsigned long msg_len = strlen(msg);
+
+   if (hb) {
+   unsigned long bp = channel->cookie_high;
+
+   si = (uintptr_t) msg;
+   di = channel->cookie_low;
+
+   VMW_PORT_HB_OUT(
+   (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG,
+   msg_len, si, di,
+   VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16),
+   VMW_HYPERVISOR_MAGIC, bp,
+   eax, ebx, ecx, edx, si, di);
+
+   return ebx;
+   }
+
+   /* HB port not available. Send the message 4 bytes at a time. */
+   ecx = MESSAGE_STATUS_SUCCESS << 16;
+   while (msg_len && (HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS)) {
+   unsigned int bytes = min_t(size_t, msg_len, 4);
+   unsigned long word = 0;
+
+   memcpy(&word, msg, bytes);
+   msg_len -= bytes;
+   msg += bytes;
+   si = channel->cookie_high;
+   di = channel->cookie_low;
+
+   VMW_PORT(VMW_PORT_CMD_MSG | (MSG_TYPE_SENDPAYLOAD << 16),
+word, si, di,
+VMW_HYPERVISOR_PORT | (channel->channel_id << 16),
+VMW_HYPERVISOR_MAGIC,
+eax, ebx, ecx, edx, si, di);
+   }
+
+   return ecx;
+}
+
+/**
+ * vmw_port_hb_in - Receive the message payload either through the
+ * high-bandwidth port if available, or through the backdoor otherwise.
+ * @channel: The rpc channel.
+ * @reply: Pointer to buffer holding reply.
+ * @reply_len: Length of the reply.
+ * @hb: Whether the high-bandwidth port is available.
+ *
+ * Return: The port status.
+ */
+static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply,
+   unsigned long reply_len, bool hb)
+{
+   unsigned long si, di, eax, ebx, ecx, edx;
+
+   if (hb) {
+   unsigned long bp = channel->cookie_low;
+
+   si = channel->cookie_high;
+   di = (uintptr_t) reply;
+
+   VMW_PORT_HB_IN(
+   (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG,
+   reply_len, si, di,
+   VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16),
+   VMW_HYPERVISOR_MAGIC, bp,
+   eax, ebx, ecx, edx, si, di);
+
+   return ebx;
+   }
+
+   /* HB port not available. Retrieve the message 4 bytes at a time. */
+   ecx = MESSAGE_STATUS_SUCCESS << 16;
+   while (reply_len) {
+   unsigned int bytes = min_t(unsigned long, reply_len, 4);
+
+   si = channel->cookie_high;
+   di = channel->cookie_low;
+
+   VMW_PORT(VMW_PORT_CMD_MSG | (MSG_TYPE_RECVPAYLOAD << 16),
+MESSAGE_STATUS_SUCCESS, si, di,
+VMW_HYPERVISOR_PORT | (channel->channel_id << 16),
+VMW_HYPERVISOR_MAGIC,
+eax, ebx, ecx, edx, si, di);
+
+   if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0)
+   break;
+
+   memcpy(reply, &ebx, bytes);
+   reply_len -= bytes;
+   reply += bytes;
+   }
+
+   return ecx;
+}
 
 
 /**
@@ -148,11 +256,10 @@ static int vmw_close_channel(struct rpc_channel *channel)
  */
 static int vmw_send_msg(struct rpc_channel *c

[PATCH 2/2] drm/vmwgfx: Honor the sg list segment size limitation

2019-06-11 Thread VMware
From: Thomas Hellstrom 

When building sg tables, honor the device sg list segment size limitation.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index a6ea75b58a83..d8ea3dd10af0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -441,11 +441,11 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
if (unlikely(ret != 0))
return ret;
 
-   ret = sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
-   vsgt->num_pages, 0,
-   (unsigned long)
-   vsgt->num_pages << PAGE_SHIFT,
-   GFP_KERNEL);
+   ret = __sg_alloc_table_from_pages
+   (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
+(unsigned long) vsgt->num_pages << PAGE_SHIFT,
+dma_get_max_seg_size(dev_priv->dev->dev),
+GFP_KERNEL);
if (unlikely(ret != 0))
goto out_sg_alloc_fail;
 
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v5 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources

2019-06-11 Thread VMware
 Emulate coherency by tracking vm accesses.
  * @backup: The backup buffer if any. Protected by resource reserved.
  * @backup_offset: Offset into the backup buffer if any. Protected by resource
  * reserved. Note that only a few resource types can have a @backup_offset
@@ -151,14 +154,16 @@ struct vmw_res_func;
  * @hw_destroy: Callback to destroy the resource on the device, as part of
  * resource destruction.
  */
+struct vmw_resource_dirty;
 struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
-   bool res_dirty;
-   bool backup_dirty;
+   u32 res_dirty : 1;
+   u32 backup_dirty : 1;
+   u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
@@ -166,6 +171,7 @@ struct vmw_resource {
struct list_head lru_head;
struct list_head mob_head;
struct list_head binding_head;
+   struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
 };
@@ -606,6 +612,9 @@ struct vmw_private {
 
/* Validation memory reservation */
struct vmw_validation_mem vvm;
+
+   /* VM operations */
+   struct vm_operations_struct vm_ops;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private 
*dev_priv);
 extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
 void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+  pgoff_t end);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log);
 #define VMW_DEBUG_USER(fmt, ...)  \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
 
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
 /**
  * Inline helper functions
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 33533d126277..319c1ca35663 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct 
vmw_private *dev_priv,
 offsetof(typeof(*cmd), sid));
 
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 VMW_RES_DIRTY_NONE, user_surface_converter,
 &cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index ..8d154f90bdc0
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/******
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAG

[PATCH v5 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Similar to write-coherent resources, make sure that from the user-space
point of view, GPU rendered contents is automatically available for
reading by the CPU.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   7 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c|  73 -
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  | 103 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h |   2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c|   3 +-
 5 files changed, 177 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 9b347923196f..dae3a39bf402 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource 
**p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
 extern struct vmw_resource *
 vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+bool dirtying);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
 void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
   pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+   pgoff_t end, pgoff_t *num_prefault);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
 void vmw_bo_dirty_clear_res(struct vmw_resource *res);
 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end);
 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 8d154f90bdc0..730c51e397dd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct 
vmw_buffer_object *vbo)
}
 }
 
-
 /**
  * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
  * tracking structure
@@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
vmw_bo_dirty_scan_mkwrite(vbo);
 }
 
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+  pgoff_t start, pgoff_t end)
+{
+   struct vmw_bo_dirty *dirty = vbo->dirty;
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+   return;
+
+   apply_as_wrprotect(mapping, start + offset, end - start);
+   apply_as_clean(mapping, start + offset, end - start, offset,
+  &dirty->bitmap[0], &dirty->start, &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end)
+{
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   vmw_bo_dirty_pre_unmap(vbo, start, end);
+   unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+  (loff_t) (end - start) << PAGE_SHIFT);
+}
+
 /**
  * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
  * @vbo: The buffer object
@@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru

[PATCH v5 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Add the callbacks necessary to implement emulated coherent memory for
surfaces. Add a flag to the gb_surface_create ioctl to indicate that
surface memory should be coherent.
Also bump the drm minor version to signal the availability of coherent
surfaces.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 .../device_include/svga3d_surfacedefs.h   | 233 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c   | 395 +-
 include/uapi/drm/vmwgfx_drm.h |   4 +-
 4 files changed, 629 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h 
b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
 }
 
-
 static inline u32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
   surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ 
svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
 }
 
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+   size_t bytes;
+   size_t img_stride;
+   size_t row_stride;
+   struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+   const struct svga3d_surface_desc *desc;
+   struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+   size_t mip_chain_bytes;
+   size_t sheet_bytes;
+   u32 num_mip_levels;
+   u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+   u32 sub_resource;
+   u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+  u32 mip_level, u32 layer)
+{
+   return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+   SVGA3dSurfaceFormat format,
+   u32 num_mip_levels,
+   u32 num_layers,
+   u32 num_samples,
+   struct svga3dsurface_cache *cache)
+{
+   const struct svga3d_surface_desc *desc;
+   u32 i;
+
+   memset(cache, 0, sizeof(*cache));
+   cache->desc = desc = svga3dsurface_get_desc(format);
+   cache->num_mip_levels = num_mip_levels;
+   cache->num_layers = num_layers;
+   for (i = 0; i < cache->num_mip_levels; i++) {
+   struct svga3dsurface_mip *mip = &cache->mip[i];
+
+   mip->size = svga3dsurface_get_mip_size(*size, i);
+   mip->bytes = svga3dsurface_get_image_buffer_size
+   (desc, &mip->size, 0);
+   mip->row_stride =
+   __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+   desc->bytes_per_block * num_samples;
+   if (!mip->row_stride)
+   goto invalid_di

[PATCH v5 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources

2019-06-11 Thread VMware
From: Thomas Hellstrom 

With emulated coherent memory we need to be able to quickly look up
a resource from the MOB offset. Instead of traversing a linked list with
O(n) worst case, use an RBtree with O(log n) worst case complexity.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c   |  5 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h  | 10 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +---
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 90ca866640fe..e8bc7a7ac031 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
 
WARN_ON(vmw_bo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
 }
@@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object 
*bo)
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
 
WARN_ON(vbo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
 }
@@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
-   INIT_LIST_HEAD(&vmw_bo->res_list);
+   vmw_bo->res_tree = RB_ROOT;
 
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
  ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 27c259395790..9b347923196f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -89,7 +89,7 @@ struct vmw_fpriv {
 /**
  * struct vmw_buffer_object - TTM buffer object with vmwgfx additions
  * @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
  * @pin_count: pin depth
  * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
  * @map: Kmap object for semi-persistent mappings
@@ -98,7 +98,7 @@ struct vmw_fpriv {
  */
 struct vmw_buffer_object {
struct ttm_buffer_object base;
-   struct list_head res_list;
+   struct rb_root res_tree;
s32 pin_count;
/* Not ref-counted.  Protected by binding_mutex */
struct vmw_resource *dx_query_ctx;
@@ -146,8 +146,8 @@ struct vmw_res_func;
  * pin-count greater than zero. It is not on the resource LRU lists and its
  * backup buffer is pinned. Hence it can't be evicted.
  * @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
  * @lru_head: List head for the LRU list. Protected by 
@dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
  * @binding_head: List head for the context binding list. Protected by
  * the @dev_priv::binding_mutex
  * @res_free: The resource destructor.
@@ -168,8 +168,8 @@ struct vmw_resource {
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+   struct rb_node mob_node;
struct list_head lru_head;
-   struct list_head mob_head;
struct list_head binding_head;
struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
@@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, 
pgoff_t start,
  */
 static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
 {
-   return !list_empty(&res->mob_head);
+   return !RB_EMPTY_NODE(&res->mob_node);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index da9afa83fb4f..b84dd5953886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -41,11 +41,24 @@
 void vmw_resource_mob_attach(struct vmw_resource *res)
 {
struct vmw_buffer_object *backup = res->backup;
+   struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
 
lockdep_assert_held(&backup->base.resv->lock.base);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
-   list_add_tail(&res->mob_head, &backup->res_list);
+
+   while (*new) {
+   struct vmw_resource *this =
+   container_of(*new, struct vmw_resource, mob_node);
+
+   parent = *new;
+   new = (res->backup_offset < this->backup_offset) ?
+   &((*new)->rb_left) : &((*new)->rb_right);
+   }
+
+   rb_link_node(&res->mob_node, parent, new);
+  

[PATCH v5 2/9] mm: Add an apply_to_pfn_range interface

2019-06-11 Thread VMware
From: Thomas Hellstrom 

This is basically apply_to_page_range with added functionality:
Allocating missing parts of the page table becomes optional, which
means that the function can be guaranteed not to error if allocation
is disabled. Also passing of the closure struct and callback function
becomes different and more in line with how things are done elsewhere.

Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range

The reason for not using the page-walk code is that we want to perform
the page-walk on vmas pointing to an address space without requiring the
mmap_sem to be held rather than on vmas belonging to a process with the
mmap_sem held.

Notable changes since RFC:
Don't export apply_to_pfn range.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 include/linux/mm.h |  10 
 mm/memory.c| 135 ++---
 2 files changed, 113 insertions(+), 32 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..3d06ce2a64af 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, 
unsigned long addr,
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
   unsigned long size, pte_fn_t fn, void *data);
 
+struct pfn_range_apply;
+typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+struct pfn_range_apply *closure);
+struct pfn_range_apply {
+   struct mm_struct *mm;
+   pter_fn_t ptefn;
+   unsigned int alloc;
+};
+extern int apply_to_pfn_range(struct pfn_range_apply *closure,
+ unsigned long address, unsigned long size);
 
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
diff --git a/mm/memory.c b/mm/memory.c
index 168f546af1ad..462aa47f8878 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, 
phys_addr_t start, unsigned long
 }
 EXPORT_SYMBOL(vm_iomap_memory);
 
-static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
 {
pte_t *pte;
int err;
pgtable_t token;
spinlock_t *uninitialized_var(ptl);
 
-   pte = (mm == &init_mm) ?
+   pte = (closure->mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) :
-   pte_alloc_map_lock(mm, pmd, addr, &ptl);
+   pte_alloc_map_lock(closure->mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
 
@@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, 
pmd_t *pmd,
token = pmd_pgtable(*pmd);
 
do {
-   err = fn(pte++, token, addr, data);
+   err = closure->ptefn(pte++, token, addr, closure);
if (err)
break;
} while (addr += PAGE_SIZE, addr != end);
 
arch_leave_lazy_mmu_mode();
 
-   if (mm != &init_mm)
+   if (closure->mm != &init_mm)
pte_unmap_unlock(pte-1, ptl);
return err;
 }
 
-static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud,
+ unsigned long addr, unsigned long end)
 {
pmd_t *pmd;
unsigned long next;
-   int err;
+   int err = 0;
 
BUG_ON(pud_huge(*pud));
 
-   pmd = pmd_alloc(mm, pud, addr);
+   pmd = pmd_alloc(closure->mm, pud, addr);
if (!pmd)
return -ENOMEM;
+
do {
next = pmd_addr_end(addr, end);
-   err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
+   if (!closure->alloc && pmd_none_or_clear_bad(pmd))
+   continue;
+   err = apply_to_pte_range(closure, pmd, addr, next);
if (err)
break;
} while (pmd++, addr = next, addr != end);
return err;
 }
 
-static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d,
+  

[PATCH v5 3/9] mm: Add write-protect and clean utilities for address space ranges

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Add two utilities to a) write-protect and b) clean all ptes pointing into
a range of an address space.
The utilities are intended to aid in tracking dirty pages (either
driver-allocated system memory or pci device memory).
The write-protect utility should be used in conjunction with
page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page
accesses. Typically one would want to use this on sparse accesses into
large memory regions. The clean utility should be used to utilize
hardware dirtying functionality and avoid the overhead of page-faults,
typically on large accesses into small memory regions.

The added file "as_dirty_helpers.c" is initially listed as maintained by
VMware under our DRM driver. If somebody would like it elsewhere,
that's of course no problem.

Notable changes since RFC:
- Added comments to help avoid the usage of these function for VMAs
  it's not intended for. We also do advisory checks on the vm_flags and
  warn on illegal usage.
- Perform the pte modifications the same way softdirty does.
- Add mmu_notifier range invalidation calls.
- Add a config option so that this code is not unconditionally included.
- Tell the mmu_gather code about pending tlb flushes.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 MAINTAINERS   |   1 +
 include/linux/mm.h|   9 +-
 mm/Kconfig|   3 +
 mm/Makefile   |   1 +
 mm/as_dirty_helpers.c | 300 ++
 5 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 mm/as_dirty_helpers.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7a2f487ea49a..a55d4ef91b0b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux
 S: Supported
 F: drivers/gpu/drm/vmwgfx/
 F: include/uapi/drm/vmwgfx_drm.h
+F: mm/as_dirty_helpers.c
 
 DRM DRIVERS
 M: David Airlie 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3d06ce2a64af..a0bc2a82917e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2685,7 +2685,14 @@ struct pfn_range_apply {
 };
 extern int apply_to_pfn_range(struct pfn_range_apply *closure,
  unsigned long address, unsigned long size);
-
+unsigned long apply_as_wrprotect(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr);
+unsigned long apply_as_clean(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr,
+pgoff_t bitmap_pgoff,
+unsigned long *bitmap,
+pgoff_t *start,
+pgoff_t *end);
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
 extern void kernel_poison_pages(struct page *page, int numpages, int enable);
diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..5006d0e6a5c7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -765,4 +765,7 @@ config GUP_BENCHMARK
 config ARCH_HAS_PTE_SPECIAL
bool
 
+config AS_DIRTY_HELPERS
+bool
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index ac5e5ba78874..f5d412bbc2f7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_HMM) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o
diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c
new file mode 100644
index ..0da7a8eed51a
--- /dev/null
+++ b/mm/as_dirty_helpers.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct apply_as - Closure structure for apply_as_range
+ * @base: struct pfn_range_apply we derive from
+ * @start: Address of first modified pte
+ * @end: Address of last modified pte + 1
+ * @total: Total number of modified ptes
+ * @vma: Pointer to the struct vm_area_struct we're currently operating on
+ */
+struct apply_as {
+   struct pfn_range_apply base;
+   unsigned long start;
+   unsigned long end;
+   unsigned long total;
+   struct vm_area_struct *vma;
+};
+
+/**
+ * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ *
+ * Return: Always zero.

[PATCH v5 5/9] drm/ttm: TTM fault handler helpers

2019-06-11 Thread VMware
From: Thomas Hellstrom 

With the vmwgfx dirty tracking, the default TTM fault handler is not
completely sufficient (vmwgfx need to modify the vma->vm_flags member,
and also needs to restrict the number of prefaults).

We also want to replicate the new ttm_bo_vm_reserve() functionality

So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved()
and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other
drivers to use.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: "Christian König"  #v1
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 175 +++-
 include/drm/ttm/ttm_bo_api.h|  10 ++
 2 files changed, 113 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 196e13a0adad..2d9862fcf6fd 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
 #include 
 #include 
 
-#define TTM_BO_VM_NUM_PREFAULT 16
-
 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
 {
@@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct 
ttm_buffer_object *bo,
+ page_offset;
 }
 
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object 
interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ *0 on success and the bo was reserved.
+ *VM_FAULT_RETRY if blocking wait.
+ *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+struct vm_fault *vmf)
 {
-   struct vm_area_struct *vma = vmf->vma;
-   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
-   vma->vm_private_data;
-   struct ttm_bo_device *bdev = bo->bdev;
-   unsigned long page_offset;
-   unsigned long page_last;
-   unsigned long pfn;
-   struct ttm_tt *ttm = NULL;
-   struct page *page;
-   int err;
-   int i;
-   vm_fault_t ret = VM_FAULT_NOPAGE;
-   unsigned long address = vmf->address;
-   struct ttm_mem_type_manager *man =
-   &bdev->man[bo->mem.mem_type];
-   struct vm_area_struct cvma;
-
-   /*
-* Work around locking order reversal in fault / nopfn
-* between mmap_sem and bo_reserve: Perform a trylock operation
-* for reserve, and if it fails, retry the fault after waiting
-* for the buffer to become unreserved.
-*/
if (unlikely(!reservation_object_trylock(bo->resv))) {
if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
@@ -151,14 +148,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
 
+   return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ *   VM_FAULT_NOPAGE on success or pending signal
+ *   VM_FAULT_SIGBUS on unspecified error
+ *   VM_FAULT_OOM on out-of-memory
+ *   VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+   pgprot_t prot,
+   pgoff_t num_prefault)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct vm_area_struct cvma = *vma;
+   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+   vma->vm_private_data;
+   struct ttm_bo_device *bdev = bo->bdev;
+   unsigned long page_offset;
+   unsigned long page_last;
+   unsigned long pfn;
+   struct ttm_tt *ttm = NULL;
+   struct p

[PATCH v5 0/9] Emulated coherent graphics memory

2019-06-11 Thread VMware
Planning to merge this through the drm/vmwgfx tree soon, so if there
are any objections, please speak up.

Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation has signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation. This is a first attempt to do that for
the vmwgfx driver.

The mm patches has been out for RFC. I think I have addressed all the
feedback I got, except a possible softdirty breakage. But although the
dirty-tracking and softdirty may write-protect PTEs both care about,
that shouldn't really cause any operation interference. In particular
since we use the hardware dirty PTE bits and softdirty uses other PTE bits.

For the TTM changes they are hopefully in line with the long-term
strategy of making helpers out of what's left of TTM.

The code has been tested and excercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.

Changes v1-v2:
- Addressed a number of typos and formatting issues.
- Added a usage warning for apply_to_pfn_range() and apply_to_page_range()
- Re-evaluated the decision to use apply_to_pfn_range() rather than
  modifying the pagewalk.c. It still looks like generically handling the
  transparent huge page cases requires the mmap_sem to be held at least
  in read mode, so sticking with apply_to_pfn_range() for now.
- The TTM page-fault helper vma copy argument was scratched in favour of
  a pageprot_t argument.
Changes v3:
- Adapted to upstream API changes.
Changes v4:
- Adapted to upstream mmu_notifier changes. (Jerome?)
- Fixed a couple of warnings on 32-bit x86
- Fixed image offset computation on multisample images.
Changes v5:
- Updated usage warning in patch 3/9 after review comments from Nadav Amit.
  
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: linux...@kvack.org
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v5 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Driver fault callbacks are allowed to drop the mmap_sem when expecting
long hardware waits to avoid blocking other mm users. Allow the mkwrite
callbacks to do the same by returning early on VM_FAULT_RETRY.

In particular we want to be able to drop the mmap_sem when waiting for
a reservation object lock on a GPU buffer object. These locks may be
held while waiting for the GPU.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell 
---
 mm/memory.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..168f546af1ad 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
-   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
+   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (unlikely(!(ret & VM_FAULT_LOCKED))) {
lock_page(page);
@@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->flags |= FAULT_FLAG_MKWRITE;
ret = vma->vm_ops->pfn_mkwrite(vmf);
-   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
+   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))
return ret;
return finish_mkwrite_fault(vmf);
}
@@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp || (tmp &
- (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+ (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+  VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
@@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
unlock_page(vmf->page);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp ||
-   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+   VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
-- 
2.20.1



[PATCH v5 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Add a pointer to the struct vm_operations_struct in the bo_device, and
assign that pointer to the default value currently used.

The driver can then optionally modify that pointer and the new value
can be used for each new vma created.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c| 1 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++---
 include/drm/ttm/ttm_bo_driver.h | 6 ++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7de667d482a..6953dd264172 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
mutex_lock(&ttm_global_mutex);
list_add_tail(&bdev->device_list, &glob->device_list);
mutex_unlock(&ttm_global_mutex);
+   bdev->vm_ops = &ttm_bo_vm_ops;
 
return 0;
 out_no_sys:
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 6dacff49c1cc..196e13a0adad 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, 
unsigned long addr,
return ret;
 }
 
-static const struct vm_operations_struct ttm_bo_vm_ops = {
+const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
@@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct 
*vma,
if (unlikely(ret != 0))
goto out_unref;
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bdev->vm_ops;
 
/*
 * Note: We're transferring the bo reference to
@@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct 
ttm_buffer_object *bo)
 
ttm_bo_get(bo);
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bo->bdev->vm_ops;
vma->vm_private_data = bo;
vma->vm_flags |= VM_MIXEDMAP;
vma->vm_flags |= VM_IO | VM_DONTEXPAND;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c9b8ba492f24..a2d810a2504d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -442,6 +442,9 @@ extern struct ttm_bo_global {
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @vma_manager: Address space manager
+ * @vm_ops: Pointer to the struct vm_operations_struct used for this
+ * device's VM operations. The driver may override this before the first
+ * mmap() call.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -460,6 +463,7 @@ struct ttm_bo_device {
struct ttm_bo_global *glob;
struct ttm_bo_driver *driver;
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+   const struct vm_operations_struct *vm_ops;
 
/*
 * Protected by internal locks.
@@ -488,6 +492,8 @@ struct ttm_bo_device {
bool no_retry;
 };
 
+extern const struct vm_operations_struct ttm_bo_vm_ops;
+
 /**
  * struct ttm_lru_bulk_move_pos
  *
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v4 2/9] mm: Add an apply_to_pfn_range interface

2019-06-11 Thread VMware
From: Thomas Hellstrom 

This is basically apply_to_page_range with added functionality:
Allocating missing parts of the page table becomes optional, which
means that the function can be guaranteed not to error if allocation
is disabled. Also passing of the closure struct and callback function
becomes different and more in line with how things are done elsewhere.

Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range

The reason for not using the page-walk code is that we want to perform
the page-walk on vmas pointing to an address space without requiring the
mmap_sem to be held rather than on vmas belonging to a process with the
mmap_sem held.

Notable changes since RFC:
Don't export apply_to_pfn range.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 include/linux/mm.h |  10 
 mm/memory.c| 135 ++---
 2 files changed, 113 insertions(+), 32 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..3d06ce2a64af 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, 
unsigned long addr,
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
   unsigned long size, pte_fn_t fn, void *data);
 
+struct pfn_range_apply;
+typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+struct pfn_range_apply *closure);
+struct pfn_range_apply {
+   struct mm_struct *mm;
+   pter_fn_t ptefn;
+   unsigned int alloc;
+};
+extern int apply_to_pfn_range(struct pfn_range_apply *closure,
+ unsigned long address, unsigned long size);
 
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
diff --git a/mm/memory.c b/mm/memory.c
index 168f546af1ad..462aa47f8878 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, 
phys_addr_t start, unsigned long
 }
 EXPORT_SYMBOL(vm_iomap_memory);
 
-static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
 {
pte_t *pte;
int err;
pgtable_t token;
spinlock_t *uninitialized_var(ptl);
 
-   pte = (mm == &init_mm) ?
+   pte = (closure->mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) :
-   pte_alloc_map_lock(mm, pmd, addr, &ptl);
+   pte_alloc_map_lock(closure->mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
 
@@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, 
pmd_t *pmd,
token = pmd_pgtable(*pmd);
 
do {
-   err = fn(pte++, token, addr, data);
+   err = closure->ptefn(pte++, token, addr, closure);
if (err)
break;
} while (addr += PAGE_SIZE, addr != end);
 
arch_leave_lazy_mmu_mode();
 
-   if (mm != &init_mm)
+   if (closure->mm != &init_mm)
pte_unmap_unlock(pte-1, ptl);
return err;
 }
 
-static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud,
+ unsigned long addr, unsigned long end)
 {
pmd_t *pmd;
unsigned long next;
-   int err;
+   int err = 0;
 
BUG_ON(pud_huge(*pud));
 
-   pmd = pmd_alloc(mm, pud, addr);
+   pmd = pmd_alloc(closure->mm, pud, addr);
if (!pmd)
return -ENOMEM;
+
do {
next = pmd_addr_end(addr, end);
-   err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
+   if (!closure->alloc && pmd_none_or_clear_bad(pmd))
+   continue;
+   err = apply_to_pte_range(closure, pmd, addr, next);
if (err)
break;
} while (pmd++, addr = next, addr != end);
return err;
 }
 
-static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d,
+  

[PATCH v4 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Driver fault callbacks are allowed to drop the mmap_sem when expecting
long hardware waits to avoid blocking other mm users. Allow the mkwrite
callbacks to do the same by returning early on VM_FAULT_RETRY.

In particular we want to be able to drop the mmap_sem when waiting for
a reservation object lock on a GPU buffer object. These locks may be
held while waiting for the GPU.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell 
---
 mm/memory.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..168f546af1ad 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
-   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
+   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (unlikely(!(ret & VM_FAULT_LOCKED))) {
lock_page(page);
@@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->flags |= FAULT_FLAG_MKWRITE;
ret = vma->vm_ops->pfn_mkwrite(vmf);
-   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
+   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))
return ret;
return finish_mkwrite_fault(vmf);
}
@@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp || (tmp &
- (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+ (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+  VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
@@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
unlock_page(vmf->page);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp ||
-   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+   VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v4 3/9] mm: Add write-protect and clean utilities for address space ranges

2019-06-11 Thread VMware
From: Thomas Hellstrom 

Add two utilities to a) write-protect and b) clean all ptes pointing into
a range of an address space.
The utilities are intended to aid in tracking dirty pages (either
driver-allocated system memory or pci device memory).
The write-protect utility should be used in conjunction with
page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page
accesses. Typically one would want to use this on sparse accesses into
large memory regions. The clean utility should be used to utilize
hardware dirtying functionality and avoid the overhead of page-faults,
typically on large accesses into small memory regions.

The added file "as_dirty_helpers.c" is initially listed as maintained by
VMware under our DRM driver. If somebody would like it elsewhere,
that's of course no problem.

Notable changes since RFC:
- Added comments to help avoid the usage of these function for VMAs
  it's not intended for. We also do advisory checks on the vm_flags and
  warn on illegal usage.
- Perform the pte modifications the same way softdirty does.
- Add mmu_notifier range invalidation calls.
- Add a config option so that this code is not unconditionally included.
- Tell the mmu_gather code about pending tlb flushes.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 MAINTAINERS   |   1 +
 include/linux/mm.h|   9 +-
 mm/Kconfig|   3 +
 mm/Makefile   |   1 +
 mm/as_dirty_helpers.c | 298 ++
 5 files changed, 311 insertions(+), 1 deletion(-)
 create mode 100644 mm/as_dirty_helpers.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7a2f487ea49a..a55d4ef91b0b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux
 S: Supported
 F: drivers/gpu/drm/vmwgfx/
 F: include/uapi/drm/vmwgfx_drm.h
+F: mm/as_dirty_helpers.c
 
 DRM DRIVERS
 M: David Airlie 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3d06ce2a64af..a0bc2a82917e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2685,7 +2685,14 @@ struct pfn_range_apply {
 };
 extern int apply_to_pfn_range(struct pfn_range_apply *closure,
  unsigned long address, unsigned long size);
-
+unsigned long apply_as_wrprotect(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr);
+unsigned long apply_as_clean(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr,
+pgoff_t bitmap_pgoff,
+unsigned long *bitmap,
+pgoff_t *start,
+pgoff_t *end);
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
 extern void kernel_poison_pages(struct page *page, int numpages, int enable);
diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..5006d0e6a5c7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -765,4 +765,7 @@ config GUP_BENCHMARK
 config ARCH_HAS_PTE_SPECIAL
bool
 
+config AS_DIRTY_HELPERS
+bool
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index ac5e5ba78874..f5d412bbc2f7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_HMM) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o
diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c
new file mode 100644
index ..ccaad41b70bc
--- /dev/null
+++ b/mm/as_dirty_helpers.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct apply_as - Closure structure for apply_as_range
+ * @base: struct pfn_range_apply we derive from
+ * @start: Address of first modified pte
+ * @end: Address of last modified pte + 1
+ * @total: Total number of modified ptes
+ * @vma: Pointer to the struct vm_area_struct we're currently operating on
+ */
+struct apply_as {
+   struct pfn_range_apply base;
+   unsigned long start;
+   unsigned long end;
+   unsigned long total;
+   struct vm_area_struct *vma;
+};
+
+/**
+ * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ *
+ * Return: Always zero.

Re: [PATCH v5 3/9] mm: Add write-protect and clean utilities for address space ranges

2019-06-12 Thread VMware

On 6/12/19 1:23 PM, Christoph Hellwig wrote:

On Wed, Jun 12, 2019 at 08:42:37AM +0200, Thomas Hellström (VMware) wrote:

From: Thomas Hellstrom 

Add two utilities to a) write-protect and b) clean all ptes pointing into
a range of an address space.
The utilities are intended to aid in tracking dirty pages (either
driver-allocated system memory or pci device memory).
The write-protect utility should be used in conjunction with
page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page
accesses. Typically one would want to use this on sparse accesses into
large memory regions. The clean utility should be used to utilize
hardware dirtying functionality and avoid the overhead of page-faults,
typically on large accesses into small memory regions.

Please use EXPORT_SYMBOL_GPL, just like for apply_to_page_range and
friends.


Sounds reasonable if this uses already EXPORT_SYMBOL_GPL'd 
functionality. I'll respin.



   Also in general new core functionality like this should go
along with the actual user, we don't need to repeat the hmm disaster.


I see in your later message that you noticed the other patches. There's 
also user-space functionality in mesa that excercises this.


/Thomas


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [PATCH v5 2/9] mm: Add an apply_to_pfn_range interface

2019-06-12 Thread VMware

On 6/12/19 2:16 PM, Christoph Hellwig wrote:

On Wed, Jun 12, 2019 at 08:42:36AM +0200, Thomas Hellström (VMware) wrote:

From: Thomas Hellstrom 

This is basically apply_to_page_range with added functionality:
Allocating missing parts of the page table becomes optional, which
means that the function can be guaranteed not to error if allocation
is disabled. Also passing of the closure struct and callback function
becomes different and more in line with how things are done elsewhere.

Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range

The reason for not using the page-walk code is that we want to perform
the page-walk on vmas pointing to an address space without requiring the
mmap_sem to be held rather than on vmas belonging to a process with the
mmap_sem held.

Notable changes since RFC:
Don't export apply_to_pfn range.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
  include/linux/mm.h |  10 
  mm/memory.c| 135 ++---
  2 files changed, 113 insertions(+), 32 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..3d06ce2a64af 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, 
unsigned long addr,
  extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
   unsigned long size, pte_fn_t fn, void *data);
  
+struct pfn_range_apply;

+typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+struct pfn_range_apply *closure);
+struct pfn_range_apply {
+   struct mm_struct *mm;
+   pter_fn_t ptefn;
+   unsigned int alloc;
+};
+extern int apply_to_pfn_range(struct pfn_range_apply *closure,
+ unsigned long address, unsigned long size);
  
  #ifdef CONFIG_PAGE_POISONING

  extern bool page_poisoning_enabled(void);
diff --git a/mm/memory.c b/mm/memory.c
index 168f546af1ad..462aa47f8878 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, 
phys_addr_t start, unsigned long
  }
  EXPORT_SYMBOL(vm_iomap_memory);
  
-static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,

-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
  {
pte_t *pte;
int err;
pgtable_t token;
spinlock_t *uninitialized_var(ptl);
  
-	pte = (mm == &init_mm) ?

+   pte = (closure->mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) :
-   pte_alloc_map_lock(mm, pmd, addr, &ptl);
+   pte_alloc_map_lock(closure->mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
  
@@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,

token = pmd_pgtable(*pmd);
  
  	do {

-   err = fn(pte++, token, addr, data);
+   err = closure->ptefn(pte++, token, addr, closure);
if (err)
break;
} while (addr += PAGE_SIZE, addr != end);
  
  	arch_leave_lazy_mmu_mode();
  
-	if (mm != &init_mm)

+   if (closure->mm != &init_mm)
pte_unmap_unlock(pte-1, ptl);
return err;
  }
  
-static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,

-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud,
+ unsigned long addr, unsigned long end)
  {
pmd_t *pmd;
unsigned long next;
-   int err;
+   int err = 0;
  
  	BUG_ON(pud_huge(*pud));
  
-	pmd = pmd_alloc(mm, pud, addr);

+   pmd = pmd_alloc(closure->mm, pud, addr);
if (!pmd)
return -ENOMEM;
+
do {
next = pmd_addr_end(addr, end);
-   err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
+   if (!closure->alloc && pmd_none_or_clear_bad(pmd))
+   continue;
+   err = apply_to_pte_range(closure, pmd, addr, next);
if (err)
break;
} while (pmd++, addr = next, addr != end);
return err;
  }
  
-static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,

-

[PATCH v6 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct

2019-06-12 Thread VMware
From: Thomas Hellstrom 

Add a pointer to the struct vm_operations_struct in the bo_device, and
assign that pointer to the default value currently used.

The driver can then optionally modify that pointer and the new value
can be used for each new vma created.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c| 1 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++---
 include/drm/ttm/ttm_bo_driver.h | 6 ++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7de667d482a..6953dd264172 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
mutex_lock(&ttm_global_mutex);
list_add_tail(&bdev->device_list, &glob->device_list);
mutex_unlock(&ttm_global_mutex);
+   bdev->vm_ops = &ttm_bo_vm_ops;
 
return 0;
 out_no_sys:
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 6dacff49c1cc..196e13a0adad 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, 
unsigned long addr,
return ret;
 }
 
-static const struct vm_operations_struct ttm_bo_vm_ops = {
+const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
@@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct 
*vma,
if (unlikely(ret != 0))
goto out_unref;
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bdev->vm_ops;
 
/*
 * Note: We're transferring the bo reference to
@@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct 
ttm_buffer_object *bo)
 
ttm_bo_get(bo);
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bo->bdev->vm_ops;
vma->vm_private_data = bo;
vma->vm_flags |= VM_MIXEDMAP;
vma->vm_flags |= VM_IO | VM_DONTEXPAND;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c9b8ba492f24..a2d810a2504d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -442,6 +442,9 @@ extern struct ttm_bo_global {
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @vma_manager: Address space manager
+ * @vm_ops: Pointer to the struct vm_operations_struct used for this
+ * device's VM operations. The driver may override this before the first
+ * mmap() call.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -460,6 +463,7 @@ struct ttm_bo_device {
struct ttm_bo_global *glob;
struct ttm_bo_driver *driver;
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+   const struct vm_operations_struct *vm_ops;
 
/*
 * Protected by internal locks.
@@ -488,6 +492,8 @@ struct ttm_bo_device {
bool no_retry;
 };
 
+extern const struct vm_operations_struct ttm_bo_vm_ops;
+
 /**
  * struct ttm_lru_bulk_move_pos
  *
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v6 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources

2019-06-12 Thread VMware
From: Thomas Hellstrom 

With emulated coherent memory we need to be able to quickly look up
a resource from the MOB offset. Instead of traversing a linked list with
O(n) worst case, use an RBtree with O(log n) worst case complexity.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c   |  5 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h  | 10 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +---
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 90ca866640fe..e8bc7a7ac031 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
 
WARN_ON(vmw_bo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
 }
@@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object 
*bo)
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
 
WARN_ON(vbo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
 }
@@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
-   INIT_LIST_HEAD(&vmw_bo->res_list);
+   vmw_bo->res_tree = RB_ROOT;
 
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
  ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 27c259395790..9b347923196f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -89,7 +89,7 @@ struct vmw_fpriv {
 /**
  * struct vmw_buffer_object - TTM buffer object with vmwgfx additions
  * @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
  * @pin_count: pin depth
  * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
  * @map: Kmap object for semi-persistent mappings
@@ -98,7 +98,7 @@ struct vmw_fpriv {
  */
 struct vmw_buffer_object {
struct ttm_buffer_object base;
-   struct list_head res_list;
+   struct rb_root res_tree;
s32 pin_count;
/* Not ref-counted.  Protected by binding_mutex */
struct vmw_resource *dx_query_ctx;
@@ -146,8 +146,8 @@ struct vmw_res_func;
  * pin-count greater than zero. It is not on the resource LRU lists and its
  * backup buffer is pinned. Hence it can't be evicted.
  * @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
  * @lru_head: List head for the LRU list. Protected by 
@dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
  * @binding_head: List head for the context binding list. Protected by
  * the @dev_priv::binding_mutex
  * @res_free: The resource destructor.
@@ -168,8 +168,8 @@ struct vmw_resource {
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+   struct rb_node mob_node;
struct list_head lru_head;
-   struct list_head mob_head;
struct list_head binding_head;
struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
@@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, 
pgoff_t start,
  */
 static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
 {
-   return !list_empty(&res->mob_head);
+   return !RB_EMPTY_NODE(&res->mob_node);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index da9afa83fb4f..b84dd5953886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -41,11 +41,24 @@
 void vmw_resource_mob_attach(struct vmw_resource *res)
 {
struct vmw_buffer_object *backup = res->backup;
+   struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
 
lockdep_assert_held(&backup->base.resv->lock.base);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
-   list_add_tail(&res->mob_head, &backup->res_list);
+
+   while (*new) {
+   struct vmw_resource *this =
+   container_of(*new, struct vmw_resource, mob_node);
+
+   parent = *new;
+   new = (res->backup_offset < this->backup_offset) ?
+   &((*new)->rb_left) : &((*new)->rb_right);
+   }
+
+   rb_link_node(&res->mob_node, parent, new);
+  

[PATCH v6 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources

2019-06-12 Thread VMware
 Emulate coherency by tracking vm accesses.
  * @backup: The backup buffer if any. Protected by resource reserved.
  * @backup_offset: Offset into the backup buffer if any. Protected by resource
  * reserved. Note that only a few resource types can have a @backup_offset
@@ -151,14 +154,16 @@ struct vmw_res_func;
  * @hw_destroy: Callback to destroy the resource on the device, as part of
  * resource destruction.
  */
+struct vmw_resource_dirty;
 struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
-   bool res_dirty;
-   bool backup_dirty;
+   u32 res_dirty : 1;
+   u32 backup_dirty : 1;
+   u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
@@ -166,6 +171,7 @@ struct vmw_resource {
struct list_head lru_head;
struct list_head mob_head;
struct list_head binding_head;
+   struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
 };
@@ -606,6 +612,9 @@ struct vmw_private {
 
/* Validation memory reservation */
struct vmw_validation_mem vvm;
+
+   /* VM operations */
+   struct vm_operations_struct vm_ops;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private 
*dev_priv);
 extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
 void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+  pgoff_t end);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log);
 #define VMW_DEBUG_USER(fmt, ...)  \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
 
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
 /**
  * Inline helper functions
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 33533d126277..319c1ca35663 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct 
vmw_private *dev_priv,
 offsetof(typeof(*cmd), sid));
 
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 VMW_RES_DIRTY_NONE, user_surface_converter,
 &cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index ..8d154f90bdc0
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/******
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAG

[PATCH v6 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources

2019-06-12 Thread VMware
From: Thomas Hellstrom 

Similar to write-coherent resources, make sure that from the user-space
point of view, GPU rendered contents is automatically available for
reading by the CPU.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   7 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c|  73 -
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  | 103 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h |   2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c|   3 +-
 5 files changed, 177 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 9b347923196f..dae3a39bf402 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource 
**p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
 extern struct vmw_resource *
 vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+bool dirtying);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
 void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
   pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+   pgoff_t end, pgoff_t *num_prefault);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
 void vmw_bo_dirty_clear_res(struct vmw_resource *res);
 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end);
 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 8d154f90bdc0..730c51e397dd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct 
vmw_buffer_object *vbo)
}
 }
 
-
 /**
  * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
  * tracking structure
@@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
vmw_bo_dirty_scan_mkwrite(vbo);
 }
 
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+  pgoff_t start, pgoff_t end)
+{
+   struct vmw_bo_dirty *dirty = vbo->dirty;
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+   return;
+
+   apply_as_wrprotect(mapping, start + offset, end - start);
+   apply_as_clean(mapping, start + offset, end - start, offset,
+  &dirty->bitmap[0], &dirty->start, &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end)
+{
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   vmw_bo_dirty_pre_unmap(vbo, start, end);
+   unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+  (loff_t) (end - start) << PAGE_SHIFT);
+}
+
 /**
  * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
  * @vbo: The buffer object
@@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru

[PATCH v6 5/9] drm/ttm: TTM fault handler helpers

2019-06-12 Thread VMware
From: Thomas Hellstrom 

With the vmwgfx dirty tracking, the default TTM fault handler is not
completely sufficient (vmwgfx need to modify the vma->vm_flags member,
and also needs to restrict the number of prefaults).

We also want to replicate the new ttm_bo_vm_reserve() functionality

So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved()
and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other
drivers to use.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: "Christian König"  #v1
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 175 +++-
 include/drm/ttm/ttm_bo_api.h|  10 ++
 2 files changed, 113 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 196e13a0adad..2d9862fcf6fd 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
 #include 
 #include 
 
-#define TTM_BO_VM_NUM_PREFAULT 16
-
 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
 {
@@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct 
ttm_buffer_object *bo,
+ page_offset;
 }
 
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object 
interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ *0 on success and the bo was reserved.
+ *VM_FAULT_RETRY if blocking wait.
+ *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+struct vm_fault *vmf)
 {
-   struct vm_area_struct *vma = vmf->vma;
-   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
-   vma->vm_private_data;
-   struct ttm_bo_device *bdev = bo->bdev;
-   unsigned long page_offset;
-   unsigned long page_last;
-   unsigned long pfn;
-   struct ttm_tt *ttm = NULL;
-   struct page *page;
-   int err;
-   int i;
-   vm_fault_t ret = VM_FAULT_NOPAGE;
-   unsigned long address = vmf->address;
-   struct ttm_mem_type_manager *man =
-   &bdev->man[bo->mem.mem_type];
-   struct vm_area_struct cvma;
-
-   /*
-* Work around locking order reversal in fault / nopfn
-* between mmap_sem and bo_reserve: Perform a trylock operation
-* for reserve, and if it fails, retry the fault after waiting
-* for the buffer to become unreserved.
-*/
if (unlikely(!reservation_object_trylock(bo->resv))) {
if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
@@ -151,14 +148,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
 
+   return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ *   VM_FAULT_NOPAGE on success or pending signal
+ *   VM_FAULT_SIGBUS on unspecified error
+ *   VM_FAULT_OOM on out-of-memory
+ *   VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+   pgprot_t prot,
+   pgoff_t num_prefault)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct vm_area_struct cvma = *vma;
+   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+   vma->vm_private_data;
+   struct ttm_bo_device *bdev = bo->bdev;
+   unsigned long page_offset;
+   unsigned long page_last;
+   unsigned long pfn;
+   struct ttm_tt *ttm = NULL;
+   struct p

[PATCH v6 0/9] Emulated coherent graphics memory

2019-06-12 Thread VMware
Planning to merge this through the drm/vmwgfx tree soon, so if there
are any objections, please speak up.

Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation has signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation. This is a first attempt to do that for
the vmwgfx driver.

The mm patches has been out for RFC. I think I have addressed all the
feedback I got, except a possible softdirty breakage. But although the
dirty-tracking and softdirty may write-protect PTEs both care about,
that shouldn't really cause any operation interference. In particular
since we use the hardware dirty PTE bits and softdirty uses other PTE bits.

For the TTM changes they are hopefully in line with the long-term
strategy of making helpers out of what's left of TTM.

The code has been tested and exercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.

Changes v1-v2:
- Addressed a number of typos and formatting issues.
- Added a usage warning for apply_to_pfn_range() and apply_to_page_range()
- Re-evaluated the decision to use apply_to_pfn_range() rather than
  modifying the pagewalk.c. It still looks like generically handling the
  transparent huge page cases requires the mmap_sem to be held at least
  in read mode, so sticking with apply_to_pfn_range() for now.
- The TTM page-fault helper vma copy argument was scratched in favour of
  a pageprot_t argument.
Changes v3:
- Adapted to upstream API changes.
Changes v4:
- Adapted to upstream mmu_notifier changes. (Jerome?)
- Fixed a couple of warnings on 32-bit x86
- Fixed image offset computation on multisample images.
Changes v5:
- Updated usage warning in patch 3/9 after review comments from Nadav Amit.
Changes v6:
- Updated exports of new functionality in patch 3/9 to EXPORT_SYMBOL_GPL
  after review comments from Christoph Hellwig.
  
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: linux...@kvack.org
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v6 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks

2019-06-12 Thread VMware
From: Thomas Hellstrom 

Add the callbacks necessary to implement emulated coherent memory for
surfaces. Add a flag to the gb_surface_create ioctl to indicate that
surface memory should be coherent.
Also bump the drm minor version to signal the availability of coherent
surfaces.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 .../device_include/svga3d_surfacedefs.h   | 233 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c   | 395 +-
 include/uapi/drm/vmwgfx_drm.h |   4 +-
 4 files changed, 629 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h 
b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
 }
 
-
 static inline u32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
   surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ 
svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
 }
 
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+   size_t bytes;
+   size_t img_stride;
+   size_t row_stride;
+   struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+   const struct svga3d_surface_desc *desc;
+   struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+   size_t mip_chain_bytes;
+   size_t sheet_bytes;
+   u32 num_mip_levels;
+   u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+   u32 sub_resource;
+   u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+  u32 mip_level, u32 layer)
+{
+   return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+   SVGA3dSurfaceFormat format,
+   u32 num_mip_levels,
+   u32 num_layers,
+   u32 num_samples,
+   struct svga3dsurface_cache *cache)
+{
+   const struct svga3d_surface_desc *desc;
+   u32 i;
+
+   memset(cache, 0, sizeof(*cache));
+   cache->desc = desc = svga3dsurface_get_desc(format);
+   cache->num_mip_levels = num_mip_levels;
+   cache->num_layers = num_layers;
+   for (i = 0; i < cache->num_mip_levels; i++) {
+   struct svga3dsurface_mip *mip = &cache->mip[i];
+
+   mip->size = svga3dsurface_get_mip_size(*size, i);
+   mip->bytes = svga3dsurface_get_image_buffer_size
+   (desc, &mip->size, 0);
+   mip->row_stride =
+   __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+   desc->bytes_per_block * num_samples;
+   if (!mip->row_stride)
+   goto invalid_di

[PATCH v6 2/9] mm: Add an apply_to_pfn_range interface

2019-06-12 Thread VMware
From: Thomas Hellstrom 

This is basically apply_to_page_range with added functionality:
Allocating missing parts of the page table becomes optional, which
means that the function can be guaranteed not to error if allocation
is disabled. Also passing of the closure struct and callback function
becomes different and more in line with how things are done elsewhere.

Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range

The reason for not using the page-walk code is that we want to perform
the page-walk on vmas pointing to an address space without requiring the
mmap_sem to be held rather than on vmas belonging to a process with the
mmap_sem held.

Notable changes since RFC:
Don't export apply_to_pfn range.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 include/linux/mm.h |  10 
 mm/memory.c| 135 ++---
 2 files changed, 113 insertions(+), 32 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..3d06ce2a64af 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, 
unsigned long addr,
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
   unsigned long size, pte_fn_t fn, void *data);
 
+struct pfn_range_apply;
+typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+struct pfn_range_apply *closure);
+struct pfn_range_apply {
+   struct mm_struct *mm;
+   pter_fn_t ptefn;
+   unsigned int alloc;
+};
+extern int apply_to_pfn_range(struct pfn_range_apply *closure,
+ unsigned long address, unsigned long size);
 
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
diff --git a/mm/memory.c b/mm/memory.c
index 168f546af1ad..462aa47f8878 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, 
phys_addr_t start, unsigned long
 }
 EXPORT_SYMBOL(vm_iomap_memory);
 
-static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
 {
pte_t *pte;
int err;
pgtable_t token;
spinlock_t *uninitialized_var(ptl);
 
-   pte = (mm == &init_mm) ?
+   pte = (closure->mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) :
-   pte_alloc_map_lock(mm, pmd, addr, &ptl);
+   pte_alloc_map_lock(closure->mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
 
@@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, 
pmd_t *pmd,
token = pmd_pgtable(*pmd);
 
do {
-   err = fn(pte++, token, addr, data);
+   err = closure->ptefn(pte++, token, addr, closure);
if (err)
break;
} while (addr += PAGE_SIZE, addr != end);
 
arch_leave_lazy_mmu_mode();
 
-   if (mm != &init_mm)
+   if (closure->mm != &init_mm)
pte_unmap_unlock(pte-1, ptl);
return err;
 }
 
-static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud,
+ unsigned long addr, unsigned long end)
 {
pmd_t *pmd;
unsigned long next;
-   int err;
+   int err = 0;
 
BUG_ON(pud_huge(*pud));
 
-   pmd = pmd_alloc(mm, pud, addr);
+   pmd = pmd_alloc(closure->mm, pud, addr);
if (!pmd)
return -ENOMEM;
+
do {
next = pmd_addr_end(addr, end);
-   err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
+   if (!closure->alloc && pmd_none_or_clear_bad(pmd))
+   continue;
+   err = apply_to_pte_range(closure, pmd, addr, next);
if (err)
break;
} while (pmd++, addr = next, addr != end);
return err;
 }
 
-static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d,
+  

[PATCH v6 3/9] mm: Add write-protect and clean utilities for address space ranges

2019-06-12 Thread VMware
From: Thomas Hellstrom 

Add two utilities to a) write-protect and b) clean all ptes pointing into
a range of an address space.
The utilities are intended to aid in tracking dirty pages (either
driver-allocated system memory or pci device memory).
The write-protect utility should be used in conjunction with
page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page
accesses. Typically one would want to use this on sparse accesses into
large memory regions. The clean utility should be used to utilize
hardware dirtying functionality and avoid the overhead of page-faults,
typically on large accesses into small memory regions.

The added file "as_dirty_helpers.c" is initially listed as maintained by
VMware under our DRM driver. If somebody would like it elsewhere,
that's of course no problem.

Notable changes since RFC:
- Added comments to help avoid the usage of these function for VMAs
  it's not intended for. We also do advisory checks on the vm_flags and
  warn on illegal usage.
- Perform the pte modifications the same way softdirty does.
- Add mmu_notifier range invalidation calls.
- Add a config option so that this code is not unconditionally included.
- Tell the mmu_gather code about pending tlb flushes.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 MAINTAINERS   |   1 +
 include/linux/mm.h|   9 +-
 mm/Kconfig|   3 +
 mm/Makefile   |   1 +
 mm/as_dirty_helpers.c | 300 ++
 5 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 mm/as_dirty_helpers.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7a2f487ea49a..a55d4ef91b0b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux
 S: Supported
 F: drivers/gpu/drm/vmwgfx/
 F: include/uapi/drm/vmwgfx_drm.h
+F: mm/as_dirty_helpers.c
 
 DRM DRIVERS
 M: David Airlie 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3d06ce2a64af..a0bc2a82917e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2685,7 +2685,14 @@ struct pfn_range_apply {
 };
 extern int apply_to_pfn_range(struct pfn_range_apply *closure,
  unsigned long address, unsigned long size);
-
+unsigned long apply_as_wrprotect(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr);
+unsigned long apply_as_clean(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr,
+pgoff_t bitmap_pgoff,
+unsigned long *bitmap,
+pgoff_t *start,
+pgoff_t *end);
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
 extern void kernel_poison_pages(struct page *page, int numpages, int enable);
diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..5006d0e6a5c7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -765,4 +765,7 @@ config GUP_BENCHMARK
 config ARCH_HAS_PTE_SPECIAL
bool
 
+config AS_DIRTY_HELPERS
+bool
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index ac5e5ba78874..f5d412bbc2f7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_HMM) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o
diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c
new file mode 100644
index ..f600e31534fb
--- /dev/null
+++ b/mm/as_dirty_helpers.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct apply_as - Closure structure for apply_as_range
+ * @base: struct pfn_range_apply we derive from
+ * @start: Address of first modified pte
+ * @end: Address of last modified pte + 1
+ * @total: Total number of modified ptes
+ * @vma: Pointer to the struct vm_area_struct we're currently operating on
+ */
+struct apply_as {
+   struct pfn_range_apply base;
+   unsigned long start;
+   unsigned long end;
+   unsigned long total;
+   struct vm_area_struct *vma;
+};
+
+/**
+ * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ *
+ * Return: Always zero.

[PATCH v6 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem

2019-06-12 Thread VMware
From: Thomas Hellstrom 

Driver fault callbacks are allowed to drop the mmap_sem when expecting
long hardware waits to avoid blocking other mm users. Allow the mkwrite
callbacks to do the same by returning early on VM_FAULT_RETRY.

In particular we want to be able to drop the mmap_sem when waiting for
a reservation object lock on a GPU buffer object. These locks may be
held while waiting for the GPU.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell 
---
 mm/memory.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..168f546af1ad 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
-   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
+   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (unlikely(!(ret & VM_FAULT_LOCKED))) {
lock_page(page);
@@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->flags |= FAULT_FLAG_MKWRITE;
ret = vma->vm_ops->pfn_mkwrite(vmf);
-   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
+   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))
return ret;
return finish_mkwrite_fault(vmf);
}
@@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp || (tmp &
- (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+ (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+  VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
@@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
unlock_page(vmf->page);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp ||
-   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+   VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v7 0/9] Emulated coherent graphics memory

2019-06-17 Thread VMware
Planning to merge this through the drm/vmwgfx tree soon, so if there
are any objections, please speak up.

Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation has signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation. This is a first attempt to do that for
the vmwgfx driver.

The mm patches has been out for RFC. I think I have addressed all the
feedback I got, except a possible softdirty breakage. But although the
dirty-tracking and softdirty may write-protect PTEs both care about,
that shouldn't really cause any operation interference. In particular
since we use the hardware dirty PTE bits and softdirty uses other PTE bits.

For the TTM changes they are hopefully in line with the long-term
strategy of making helpers out of what's left of TTM.

The code has been tested and exercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.

Changes v1-v2:
- Addressed a number of typos and formatting issues.
- Added a usage warning for apply_to_pfn_range() and apply_to_page_range()
- Re-evaluated the decision to use apply_to_pfn_range() rather than
  modifying the pagewalk.c. It still looks like generically handling the
  transparent huge page cases requires the mmap_sem to be held at least
  in read mode, so sticking with apply_to_pfn_range() for now.
- The TTM page-fault helper vma copy argument was scratched in favour of
  a pageprot_t argument.
Changes v3:
- Adapted to upstream API changes.
Changes v4:
- Adapted to upstream mmu_notifier changes. (Jerome?)
- Fixed a couple of warnings on 32-bit x86
- Fixed image offset computation on multisample images.
Changes v5:
- Updated usage warning in patch 3/9 after review comments from Nadav Amit.
Changes v6:
- Updated exports of new functionality in patch 3/9 to EXPORT_SYMBOL_GPL
  after review comments from Christoph Hellwig.
Changes v7:
- Re-added removed comment in ttm_bo_vm.c (Review by Hillf Danton)
- Fixed an error path regression in ttm_bo_vm.c
  
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: linux...@kvack.org
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v7 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks

2019-06-17 Thread VMware
From: Thomas Hellstrom 

Add the callbacks necessary to implement emulated coherent memory for
surfaces. Add a flag to the gb_surface_create ioctl to indicate that
surface memory should be coherent.
Also bump the drm minor version to signal the availability of coherent
surfaces.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 .../device_include/svga3d_surfacedefs.h   | 233 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c   | 395 +-
 include/uapi/drm/vmwgfx_drm.h |   4 +-
 4 files changed, 629 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h 
b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
 }
 
-
 static inline u32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
   surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ 
svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
 }
 
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+   size_t bytes;
+   size_t img_stride;
+   size_t row_stride;
+   struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+   const struct svga3d_surface_desc *desc;
+   struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+   size_t mip_chain_bytes;
+   size_t sheet_bytes;
+   u32 num_mip_levels;
+   u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+   u32 sub_resource;
+   u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+  u32 mip_level, u32 layer)
+{
+   return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+   SVGA3dSurfaceFormat format,
+   u32 num_mip_levels,
+   u32 num_layers,
+   u32 num_samples,
+   struct svga3dsurface_cache *cache)
+{
+   const struct svga3d_surface_desc *desc;
+   u32 i;
+
+   memset(cache, 0, sizeof(*cache));
+   cache->desc = desc = svga3dsurface_get_desc(format);
+   cache->num_mip_levels = num_mip_levels;
+   cache->num_layers = num_layers;
+   for (i = 0; i < cache->num_mip_levels; i++) {
+   struct svga3dsurface_mip *mip = &cache->mip[i];
+
+   mip->size = svga3dsurface_get_mip_size(*size, i);
+   mip->bytes = svga3dsurface_get_image_buffer_size
+   (desc, &mip->size, 0);
+   mip->row_stride =
+   __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+   desc->bytes_per_block * num_samples;
+   if (!mip->row_stride)
+   goto invalid_di

[PATCH v7 3/9] mm: Add write-protect and clean utilities for address space ranges

2019-06-17 Thread VMware
From: Thomas Hellstrom 

Add two utilities to a) write-protect and b) clean all ptes pointing into
a range of an address space.
The utilities are intended to aid in tracking dirty pages (either
driver-allocated system memory or pci device memory).
The write-protect utility should be used in conjunction with
page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page
accesses. Typically one would want to use this on sparse accesses into
large memory regions. The clean utility should be used to utilize
hardware dirtying functionality and avoid the overhead of page-faults,
typically on large accesses into small memory regions.

The added file "as_dirty_helpers.c" is initially listed as maintained by
VMware under our DRM driver. If somebody would like it elsewhere,
that's of course no problem.

Notable changes since RFC:
- Added comments to help avoid the usage of these function for VMAs
  it's not intended for. We also do advisory checks on the vm_flags and
  warn on illegal usage.
- Perform the pte modifications the same way softdirty does.
- Add mmu_notifier range invalidation calls.
- Add a config option so that this code is not unconditionally included.
- Tell the mmu_gather code about pending tlb flushes.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 MAINTAINERS   |   1 +
 include/linux/mm.h|   9 +-
 mm/Kconfig|   3 +
 mm/Makefile   |   1 +
 mm/as_dirty_helpers.c | 300 ++
 5 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 mm/as_dirty_helpers.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7a2f487ea49a..a55d4ef91b0b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux
 S: Supported
 F: drivers/gpu/drm/vmwgfx/
 F: include/uapi/drm/vmwgfx_drm.h
+F: mm/as_dirty_helpers.c
 
 DRM DRIVERS
 M: David Airlie 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3d06ce2a64af..a0bc2a82917e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2685,7 +2685,14 @@ struct pfn_range_apply {
 };
 extern int apply_to_pfn_range(struct pfn_range_apply *closure,
  unsigned long address, unsigned long size);
-
+unsigned long apply_as_wrprotect(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr);
+unsigned long apply_as_clean(struct address_space *mapping,
+pgoff_t first_index, pgoff_t nr,
+pgoff_t bitmap_pgoff,
+unsigned long *bitmap,
+pgoff_t *start,
+pgoff_t *end);
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
 extern void kernel_poison_pages(struct page *page, int numpages, int enable);
diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..5006d0e6a5c7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -765,4 +765,7 @@ config GUP_BENCHMARK
 config ARCH_HAS_PTE_SPECIAL
bool
 
+config AS_DIRTY_HELPERS
+bool
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index ac5e5ba78874..f5d412bbc2f7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_HMM) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o
diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c
new file mode 100644
index ..f600e31534fb
--- /dev/null
+++ b/mm/as_dirty_helpers.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct apply_as - Closure structure for apply_as_range
+ * @base: struct pfn_range_apply we derive from
+ * @start: Address of first modified pte
+ * @end: Address of last modified pte + 1
+ * @total: Total number of modified ptes
+ * @vma: Pointer to the struct vm_area_struct we're currently operating on
+ */
+struct apply_as {
+   struct pfn_range_apply base;
+   unsigned long start;
+   unsigned long end;
+   unsigned long total;
+   struct vm_area_struct *vma;
+};
+
+/**
+ * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ *
+ * Return: Always zero.

[PATCH v7 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources

2019-06-17 Thread VMware
From: Thomas Hellstrom 

Similar to write-coherent resources, make sure that from the user-space
point of view, GPU rendered contents is automatically available for
reading by the CPU.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   7 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c|  73 -
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  | 103 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h |   2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c|   3 +-
 5 files changed, 177 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 9b347923196f..dae3a39bf402 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource 
**p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
 extern struct vmw_resource *
 vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+bool dirtying);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
 void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
   pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+   pgoff_t end, pgoff_t *num_prefault);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
 void vmw_bo_dirty_clear_res(struct vmw_resource *res);
 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end);
 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 8d154f90bdc0..730c51e397dd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct 
vmw_buffer_object *vbo)
}
 }
 
-
 /**
  * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
  * tracking structure
@@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
vmw_bo_dirty_scan_mkwrite(vbo);
 }
 
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+  pgoff_t start, pgoff_t end)
+{
+   struct vmw_bo_dirty *dirty = vbo->dirty;
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+   return;
+
+   apply_as_wrprotect(mapping, start + offset, end - start);
+   apply_as_clean(mapping, start + offset, end - start, offset,
+  &dirty->bitmap[0], &dirty->start, &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end)
+{
+   unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   vmw_bo_dirty_pre_unmap(vbo, start, end);
+   unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+  (loff_t) (end - start) << PAGE_SHIFT);
+}
+
 /**
  * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
  * @vbo: The buffer object
@@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru

[PATCH v7 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct

2019-06-17 Thread VMware
From: Thomas Hellstrom 

Add a pointer to the struct vm_operations_struct in the bo_device, and
assign that pointer to the default value currently used.

The driver can then optionally modify that pointer and the new value
can be used for each new vma created.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c| 1 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++---
 include/drm/ttm/ttm_bo_driver.h | 6 ++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7de667d482a..6953dd264172 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
mutex_lock(&ttm_global_mutex);
list_add_tail(&bdev->device_list, &glob->device_list);
mutex_unlock(&ttm_global_mutex);
+   bdev->vm_ops = &ttm_bo_vm_ops;
 
return 0;
 out_no_sys:
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 6dacff49c1cc..196e13a0adad 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, 
unsigned long addr,
return ret;
 }
 
-static const struct vm_operations_struct ttm_bo_vm_ops = {
+const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
@@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct 
*vma,
if (unlikely(ret != 0))
goto out_unref;
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bdev->vm_ops;
 
/*
 * Note: We're transferring the bo reference to
@@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct 
ttm_buffer_object *bo)
 
ttm_bo_get(bo);
 
-   vma->vm_ops = &ttm_bo_vm_ops;
+   vma->vm_ops = bo->bdev->vm_ops;
vma->vm_private_data = bo;
vma->vm_flags |= VM_MIXEDMAP;
vma->vm_flags |= VM_IO | VM_DONTEXPAND;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c9b8ba492f24..a2d810a2504d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -442,6 +442,9 @@ extern struct ttm_bo_global {
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @vma_manager: Address space manager
+ * @vm_ops: Pointer to the struct vm_operations_struct used for this
+ * device's VM operations. The driver may override this before the first
+ * mmap() call.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -460,6 +463,7 @@ struct ttm_bo_device {
struct ttm_bo_global *glob;
struct ttm_bo_driver *driver;
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+   const struct vm_operations_struct *vm_ops;
 
/*
 * Protected by internal locks.
@@ -488,6 +492,8 @@ struct ttm_bo_device {
bool no_retry;
 };
 
+extern const struct vm_operations_struct ttm_bo_vm_ops;
+
 /**
  * struct ttm_lru_bulk_move_pos
  *
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v7 5/9] drm/ttm: TTM fault handler helpers

2019-06-17 Thread VMware
From: Thomas Hellstrom 

With the vmwgfx dirty tracking, the default TTM fault handler is not
completely sufficient (vmwgfx need to modify the vma->vm_flags member,
and also needs to restrict the number of prefaults).

We also want to replicate the new ttm_bo_vm_reserve() functionality

So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved()
and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other
drivers to use.

Cc: "Christian König" 

Signed-off-by: Thomas Hellstrom 
Reviewed-by: "Christian König"  #v1
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 163 
 include/drm/ttm/ttm_bo_api.h|  10 ++
 2 files changed, 111 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 196e13a0adad..0c4576cbafcf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
 #include 
 #include 
 
-#define TTM_BO_VM_NUM_PREFAULT 16
-
 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
 {
@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct 
ttm_buffer_object *bo,
+ page_offset;
 }
 
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object 
interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ *0 on success and the bo was reserved.
+ *VM_FAULT_RETRY if blocking wait.
+ *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+struct vm_fault *vmf)
 {
-   struct vm_area_struct *vma = vmf->vma;
-   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
-   vma->vm_private_data;
-   struct ttm_bo_device *bdev = bo->bdev;
-   unsigned long page_offset;
-   unsigned long page_last;
-   unsigned long pfn;
-   struct ttm_tt *ttm = NULL;
-   struct page *page;
-   int err;
-   int i;
-   vm_fault_t ret = VM_FAULT_NOPAGE;
-   unsigned long address = vmf->address;
-   struct ttm_mem_type_manager *man =
-   &bdev->man[bo->mem.mem_type];
-   struct vm_area_struct cvma;
-
/*
 * Work around locking order reversal in fault / nopfn
 * between mmap_sem and bo_reserve: Perform a trylock operation
@@ -151,14 +154,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
 
+   return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ *   VM_FAULT_NOPAGE on success or pending signal
+ *   VM_FAULT_SIGBUS on unspecified error
+ *   VM_FAULT_OOM on out-of-memory
+ *   VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+   pgprot_t prot,
+   pgoff_t num_prefault)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct vm_area_struct cvma = *vma;
+   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+   vma->vm_private_data;
+   struct ttm_bo_device *bdev = bo->bdev;
+   unsigned long page_offset;
+   unsigned long page_last;
+   unsigned long pfn;
+   struct ttm_tt *ttm = NULL;
+   struct page *page;
+   int err;
+   pgoff_t i;
+   vm_fault_t ret = VM_FAULT_NOPAGE;
+   unsigned long address = vmf->address;
+   struct ttm_mem_type_manager *man =
+   &bdev->man[bo->mem.mem_type];
+
/*
 * Refuse to fault imported pages. This should be handled
 * (if at all

[PATCH v7 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem

2019-06-17 Thread VMware
From: Thomas Hellstrom 

Driver fault callbacks are allowed to drop the mmap_sem when expecting
long hardware waits to avoid blocking other mm users. Allow the mkwrite
callbacks to do the same by returning early on VM_FAULT_RETRY.

In particular we want to be able to drop the mmap_sem when waiting for
a reservation object lock on a GPU buffer object. These locks may be
held while waiting for the GPU.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell 
---
 mm/memory.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..168f546af1ad 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
-   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
+   if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (unlikely(!(ret & VM_FAULT_LOCKED))) {
lock_page(page);
@@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->flags |= FAULT_FLAG_MKWRITE;
ret = vma->vm_ops->pfn_mkwrite(vmf);
-   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
+   if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))
return ret;
return finish_mkwrite_fault(vmf);
}
@@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp || (tmp &
- (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+ (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+  VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
@@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
unlock_page(vmf->page);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp ||
-   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE {
+   (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+   VM_FAULT_RETRY {
put_page(vmf->page);
return tmp;
}
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v7 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources

2019-06-17 Thread VMware
From: Thomas Hellstrom 

With emulated coherent memory we need to be able to quickly look up
a resource from the MOB offset. Instead of traversing a linked list with
O(n) worst case, use an RBtree with O(log n) worst case complexity.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c   |  5 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h  | 10 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +---
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 90ca866640fe..e8bc7a7ac031 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
 
WARN_ON(vmw_bo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
 }
@@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object 
*bo)
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
 
WARN_ON(vbo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
 }
@@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
-   INIT_LIST_HEAD(&vmw_bo->res_list);
+   vmw_bo->res_tree = RB_ROOT;
 
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
  ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 27c259395790..9b347923196f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -89,7 +89,7 @@ struct vmw_fpriv {
 /**
  * struct vmw_buffer_object - TTM buffer object with vmwgfx additions
  * @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
  * @pin_count: pin depth
  * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
  * @map: Kmap object for semi-persistent mappings
@@ -98,7 +98,7 @@ struct vmw_fpriv {
  */
 struct vmw_buffer_object {
struct ttm_buffer_object base;
-   struct list_head res_list;
+   struct rb_root res_tree;
s32 pin_count;
/* Not ref-counted.  Protected by binding_mutex */
struct vmw_resource *dx_query_ctx;
@@ -146,8 +146,8 @@ struct vmw_res_func;
  * pin-count greater than zero. It is not on the resource LRU lists and its
  * backup buffer is pinned. Hence it can't be evicted.
  * @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
  * @lru_head: List head for the LRU list. Protected by 
@dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
  * @binding_head: List head for the context binding list. Protected by
  * the @dev_priv::binding_mutex
  * @res_free: The resource destructor.
@@ -168,8 +168,8 @@ struct vmw_resource {
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+   struct rb_node mob_node;
struct list_head lru_head;
-   struct list_head mob_head;
struct list_head binding_head;
struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
@@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, 
pgoff_t start,
  */
 static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
 {
-   return !list_empty(&res->mob_head);
+   return !RB_EMPTY_NODE(&res->mob_node);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index da9afa83fb4f..b84dd5953886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -41,11 +41,24 @@
 void vmw_resource_mob_attach(struct vmw_resource *res)
 {
struct vmw_buffer_object *backup = res->backup;
+   struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
 
lockdep_assert_held(&backup->base.resv->lock.base);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
-   list_add_tail(&res->mob_head, &backup->res_list);
+
+   while (*new) {
+   struct vmw_resource *this =
+   container_of(*new, struct vmw_resource, mob_node);
+
+   parent = *new;
+   new = (res->backup_offset < this->backup_offset) ?
+   &((*new)->rb_left) : &((*new)->rb_right);
+   }
+
+   rb_link_node(&res->mob_node, parent, new);
+  

[PATCH v7 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources

2019-06-17 Thread VMware
 Emulate coherency by tracking vm accesses.
  * @backup: The backup buffer if any. Protected by resource reserved.
  * @backup_offset: Offset into the backup buffer if any. Protected by resource
  * reserved. Note that only a few resource types can have a @backup_offset
@@ -151,14 +154,16 @@ struct vmw_res_func;
  * @hw_destroy: Callback to destroy the resource on the device, as part of
  * resource destruction.
  */
+struct vmw_resource_dirty;
 struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
-   bool res_dirty;
-   bool backup_dirty;
+   u32 res_dirty : 1;
+   u32 backup_dirty : 1;
+   u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
@@ -166,6 +171,7 @@ struct vmw_resource {
struct list_head lru_head;
struct list_head mob_head;
struct list_head binding_head;
+   struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
 };
@@ -606,6 +612,9 @@ struct vmw_private {
 
/* Validation memory reservation */
struct vmw_validation_mem vvm;
+
+   /* VM operations */
+   struct vm_operations_struct vm_ops;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private 
*dev_priv);
 extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
 void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+  pgoff_t end);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log);
 #define VMW_DEBUG_USER(fmt, ...)  \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
 
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
 /**
  * Inline helper functions
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 33533d126277..319c1ca35663 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct 
vmw_private *dev_priv,
 offsetof(typeof(*cmd), sid));
 
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 VMW_RES_DIRTY_NONE, user_surface_converter,
 &cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index ..8d154f90bdc0
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/******
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAG

[PATCH v7 2/9] mm: Add an apply_to_pfn_range interface

2019-06-17 Thread VMware
From: Thomas Hellstrom 

This is basically apply_to_page_range with added functionality:
Allocating missing parts of the page table becomes optional, which
means that the function can be guaranteed not to error if allocation
is disabled. Also passing of the closure struct and callback function
becomes different and more in line with how things are done elsewhere.

Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range

The reason for not using the page-walk code is that we want to perform
the page-walk on vmas pointing to an address space without requiring the
mmap_sem to be held rather than on vmas belonging to a process with the
mmap_sem held.

Notable changes since RFC:
Don't export apply_to_pfn range.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Souptick Joarder 
Cc: "Jérôme Glisse" 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.kernel.org

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Ralph Campbell  #v1
---
 include/linux/mm.h |  10 
 mm/memory.c| 135 ++---
 2 files changed, 113 insertions(+), 32 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..3d06ce2a64af 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, 
unsigned long addr,
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
   unsigned long size, pte_fn_t fn, void *data);
 
+struct pfn_range_apply;
+typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+struct pfn_range_apply *closure);
+struct pfn_range_apply {
+   struct mm_struct *mm;
+   pter_fn_t ptefn;
+   unsigned int alloc;
+};
+extern int apply_to_pfn_range(struct pfn_range_apply *closure,
+ unsigned long address, unsigned long size);
 
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
diff --git a/mm/memory.c b/mm/memory.c
index 168f546af1ad..462aa47f8878 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, 
phys_addr_t start, unsigned long
 }
 EXPORT_SYMBOL(vm_iomap_memory);
 
-static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
 {
pte_t *pte;
int err;
pgtable_t token;
spinlock_t *uninitialized_var(ptl);
 
-   pte = (mm == &init_mm) ?
+   pte = (closure->mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) :
-   pte_alloc_map_lock(mm, pmd, addr, &ptl);
+   pte_alloc_map_lock(closure->mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
 
@@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, 
pmd_t *pmd,
token = pmd_pgtable(*pmd);
 
do {
-   err = fn(pte++, token, addr, data);
+   err = closure->ptefn(pte++, token, addr, closure);
if (err)
break;
} while (addr += PAGE_SIZE, addr != end);
 
arch_leave_lazy_mmu_mode();
 
-   if (mm != &init_mm)
+   if (closure->mm != &init_mm)
pte_unmap_unlock(pte-1, ptl);
return err;
 }
 
-static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud,
+ unsigned long addr, unsigned long end)
 {
pmd_t *pmd;
unsigned long next;
-   int err;
+   int err = 0;
 
BUG_ON(pud_huge(*pud));
 
-   pmd = pmd_alloc(mm, pud, addr);
+   pmd = pmd_alloc(closure->mm, pud, addr);
if (!pmd)
return -ENOMEM;
+
do {
next = pmd_addr_end(addr, end);
-   err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
+   if (!closure->alloc && pmd_none_or_clear_bad(pmd))
+   continue;
+   err = apply_to_pte_range(closure, pmd, addr, next);
if (err)
break;
} while (pmd++, addr = next, addr != end);
return err;
 }
 
-static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
-unsigned long addr, unsigned long end,
-pte_fn_t fn, void *data)
+static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d,
+  

[PATCH 2/4] drm/vmwgfx: Add debug message for layout change ioctl

2019-06-17 Thread VMware
From: Deepak Rawat 

Add debug code to check user-space layout change request.

Signed-off-by: Deepak Rawat 
Reviewed-by: Thomas Hellstrom 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 8 
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 
 2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 5971c0d47507..b4c8817dc267 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1424,6 +1424,14 @@ int vmw_host_log(const char *log);
 #define VMW_DEBUG_USER(fmt, ...)  \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
 
+/**
+ * VMW_DEBUG_KMS - Debug output for kernel mode-setting
+ *
+ * This macro is for debugging vmwgfx mode-setting code.
+ */
+#define VMW_DEBUG_KMS(fmt, ...)   \
+   DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
+
 /* Resource dirtying - vmwgfx_page_dirty.c */
 void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
 int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b97bc8e5944b..7f9264a72e1d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2347,6 +2347,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, 
void *data,
 
if (!arg->num_outputs) {
struct drm_rect def_rect = {0, 0, 800, 600};
+   VMW_DEBUG_KMS("Default layout x1 = %d y1 = %d x2 = %d y2 = 
%d\n",
+ def_rect.x1, def_rect.y1,
+ def_rect.x2, def_rect.y2);
vmw_du_update_layout(dev_priv, 1, &def_rect);
return 0;
}
@@ -2367,6 +2370,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, 
void *data,
 
drm_rects = (struct drm_rect *)rects;
 
+   VMW_DEBUG_KMS("Layout count = %u\n", arg->num_outputs);
for (i = 0; i < arg->num_outputs; i++) {
struct drm_vmw_rect curr_rect;
 
@@ -2383,6 +2387,10 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, 
void *data,
drm_rects[i].x2 = curr_rect.x + curr_rect.w;
drm_rects[i].y2 = curr_rect.y + curr_rect.h;
 
+   VMW_DEBUG_KMS("  x1 = %d y1 = %d x2 = %d y2 = %d\n",
+ drm_rects[i].x1, drm_rects[i].y1,
+ drm_rects[i].x2, drm_rects[i].y2);
+
/*
 * Currently this check is limiting the topology within
 * mode_config->max (which actually is max texture size
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 1/4] drm/vmwgfx: Assign eviction priorities to resources

2019-06-17 Thread VMware
From: Thomas Hellstrom 

TTM provides a means to assign eviction priorities to buffer object. This
means that all buffer objects with a lower priority will be evicted first
on memory pressure.
Use this to make sure surfaces and in particular non-dirty surfaces are
evicted first. Evicting in particular shaders, cotables and contexts imply
a significant performance hit on vmwgfx, so make sure these resources are
evicted last.
Some buffer objects are sub-allocated in user-space which means we can have
many resources attached to a single buffer object or resource. In that case
the buffer object is given the highest priority of the attached resources.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c|  2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_context.c   |  4 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c   | 13 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   | 72 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  | 56 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h |  2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_shader.c|  8 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c   |  4 ++
 8 files changed, 141 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 5d5c2bce01f3..c0829d50eecc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -510,6 +510,8 @@ int vmw_bo_init(struct vmw_private *dev_priv,
 
acc_size = vmw_bo_acc_size(dev_priv, size, user);
memset(vmw_bo, 0, sizeof(*vmw_bo));
+   BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
+   vmw_bo->base.priority = 3;
 
INIT_LIST_HEAD(&vmw_bo->res_list);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 63f111068a44..a56c9d802382 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -88,6 +88,8 @@ static const struct vmw_res_func vmw_gb_context_func = {
.res_type = vmw_res_context,
.needs_backup = true,
.may_evict = true,
+   .prio = 3,
+   .dirty_prio = 3,
.type_name = "guest backed contexts",
.backup_placement = &vmw_mob_placement,
.create = vmw_gb_context_create,
@@ -100,6 +102,8 @@ static const struct vmw_res_func vmw_dx_context_func = {
.res_type = vmw_res_dx_context,
.needs_backup = true,
.may_evict = true,
+   .prio = 3,
+   .dirty_prio = 3,
.type_name = "dx contexts",
.backup_placement = &vmw_mob_placement,
.create = vmw_dx_context_create,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
index b4f6e1217c9d..8c699cb2565b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
@@ -116,6 +116,8 @@ static const struct vmw_res_func vmw_cotable_func = {
.res_type = vmw_res_cotable,
.needs_backup = true,
.may_evict = true,
+   .prio = 3,
+   .dirty_prio = 3,
.type_name = "context guest backed object tables",
.backup_placement = &vmw_mob_placement,
.create = vmw_cotable_create,
@@ -307,7 +309,7 @@ static int vmw_cotable_unbind(struct vmw_resource *res,
struct ttm_buffer_object *bo = val_buf->bo;
struct vmw_fence_obj *fence;
 
-   if (list_empty(&res->mob_head))
+   if (!vmw_resource_mob_attached(res))
return 0;
 
WARN_ON_ONCE(bo->mem.mem_type != VMW_PL_MOB);
@@ -453,6 +455,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, 
size_t new_size)
goto out_wait;
}
 
+   vmw_resource_mob_detach(res);
res->backup = buf;
res->backup_size = new_size;
vcotbl->size_read_back = cur_size_read_back;
@@ -467,12 +470,12 @@ static int vmw_cotable_resize(struct vmw_resource *res, 
size_t new_size)
res->backup = old_buf;
res->backup_size = old_size;
vcotbl->size_read_back = old_size_read_back;
+   vmw_resource_mob_attach(res);
goto out_wait;
}
 
+   vmw_resource_mob_attach(res);
/* Let go of the old mob. */
-   list_del(&res->mob_head);
-   list_add_tail(&res->mob_head, &buf->res_list);
vmw_bo_unreference(&old_buf);
res->id = vcotbl->type;
 
@@ -496,7 +499,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, 
size_t new_size)
  * is called before bind() in the validation sequence is instead used for two
  * things.
  * 1) Unscrub the cotable if it is scrubbed and still attached to a backup
- *buffer, that is, if @res->mob_head is non-empty.
+ *buffer.
  * 2) Resize the cotable if needed.
  */
 static int vmw_cotable_create(struct vmw_resource *res)
@@ -512,7 +515,7 @@ static int vmw_cotable_create(struct vmw_resource *res)
new_size *= 2;
 
if

[PATCH 3/4] drm/vmwgfx: Use VMW_DEBUG_KMS for vmwgfx mode-setting user errors

2019-06-17 Thread VMware
From: Deepak Rawat 

For errors during layout change ioctl use VMW_DEBUG_KMS instead of
DRM_ERROR.

Signed-off-by: Deepak Rawat 
Reviewed-by: Thomas Hellstrom 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 7f9264a72e1d..e7222fa2cfdf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1462,7 +1462,7 @@ static int vmw_kms_check_display_memory(struct drm_device 
*dev,
if (dev_priv->active_display_unit == vmw_du_screen_target &&
(drm_rect_width(&rects[i]) > dev_priv->stdu_max_width ||
 drm_rect_height(&rects[i]) > dev_priv->stdu_max_height)) {
-   DRM_ERROR("Screen size not supported.\n");
+   VMW_DEBUG_KMS("Screen size not supported.\n");
return -EINVAL;
}
 
@@ -1486,7 +1486,7 @@ static int vmw_kms_check_display_memory(struct drm_device 
*dev,
 * limit on primary bounding box
 */
if (pixel_mem > dev_priv->prim_bb_mem) {
-   DRM_ERROR("Combined output size too large.\n");
+   VMW_DEBUG_KMS("Combined output size too large.\n");
return -EINVAL;
}
 
@@ -1496,7 +1496,7 @@ static int vmw_kms_check_display_memory(struct drm_device 
*dev,
bb_mem = (u64) bounding_box.x2 * bounding_box.y2 * 4;
 
if (bb_mem > dev_priv->prim_bb_mem) {
-   DRM_ERROR("Topology is beyond supported limits.\n");
+   VMW_DEBUG_KMS("Topology is beyond supported limits.\n");
return -EINVAL;
}
}
@@ -1645,6 +1645,7 @@ static int vmw_kms_check_topology(struct drm_device *dev,
struct vmw_connector_state *vmw_conn_state;
 
if (!du->pref_active && new_crtc_state->enable) {
+   VMW_DEBUG_KMS("Enabling a disabled display unit\n");
ret = -EINVAL;
goto clean;
}
@@ -1701,8 +1702,10 @@ vmw_kms_atomic_check_modeset(struct drm_device *dev,
return ret;
 
ret = vmw_kms_check_implicit(dev, state);
-   if (ret)
+   if (ret) {
+   VMW_DEBUG_KMS("Invalid implicit state\n");
return ret;
+   }
 
if (!state->allow_modeset)
return ret;
@@ -2401,7 +2404,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, 
void *data,
if (drm_rects[i].x1 < 0 ||  drm_rects[i].y1 < 0 ||
drm_rects[i].x2 > mode_config->max_width ||
drm_rects[i].y2 > mode_config->max_height) {
-   DRM_ERROR("Invalid GUI layout.\n");
+   VMW_DEBUG_KMS("Invalid layout %d %d %d %d\n",
+ drm_rects[i].x1, drm_rects[i].y1,
+ drm_rects[i].x2, drm_rects[i].y2);
ret = -EINVAL;
goto out_free;
}
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 4/4] drm/vmwgfx: Kill unneeded legacy security features

2019-06-17 Thread VMware
From: Thomas Hellstrom 

At one point, the GPU command verifier and user-space handle manager
couldn't properly protect GPU clients from accessing each other's data.
Instead there was an elaborate mechanism to make sure only the active
master's primary clients could render. The other clients were either
put to sleep or even killed (if the master had exited). VRAM was
evicted on master switch. With the advent of render-node functionality,
we relaxed the VRAM eviction, but the other mechanisms stayed in place.

Now that the GPU  command verifier and ttm object manager properly
isolates primary clients from different master realms we can remove the
master switch related code and drop those legacy features.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/ttm_lock.c   | 100 ---
 drivers/gpu/drm/vmwgfx/ttm_lock.h   |  30 -
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 162 +---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  16 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c |   6 -
 5 files changed, 3 insertions(+), 311 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/ttm_lock.c 
b/drivers/gpu/drm/vmwgfx/ttm_lock.c
index 16b2083cb9d4..5971c72e6d10 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_lock.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_lock.c
@@ -29,7 +29,6 @@
  * Authors: Thomas Hellstrom 
  */
 
-#include 
 #include 
 #include 
 #include 
@@ -49,8 +48,6 @@ void ttm_lock_init(struct ttm_lock *lock)
init_waitqueue_head(&lock->queue);
lock->rw = 0;
lock->flags = 0;
-   lock->kill_takers = false;
-   lock->signal = SIGKILL;
 }
 
 void ttm_read_unlock(struct ttm_lock *lock)
@@ -66,11 +63,6 @@ static bool __ttm_read_lock(struct ttm_lock *lock)
bool locked = false;
 
spin_lock(&lock->lock);
-   if (unlikely(lock->kill_takers)) {
-   send_sig(lock->signal, current, 0);
-   spin_unlock(&lock->lock);
-   return false;
-   }
if (lock->rw >= 0 && lock->flags == 0) {
++lock->rw;
locked = true;
@@ -98,11 +90,6 @@ static bool __ttm_read_trylock(struct ttm_lock *lock, bool 
*locked)
*locked = false;
 
spin_lock(&lock->lock);
-   if (unlikely(lock->kill_takers)) {
-   send_sig(lock->signal, current, 0);
-   spin_unlock(&lock->lock);
-   return false;
-   }
if (lock->rw >= 0 && lock->flags == 0) {
++lock->rw;
block = false;
@@ -147,11 +134,6 @@ static bool __ttm_write_lock(struct ttm_lock *lock)
bool locked = false;
 
spin_lock(&lock->lock);
-   if (unlikely(lock->kill_takers)) {
-   send_sig(lock->signal, current, 0);
-   spin_unlock(&lock->lock);
-   return false;
-   }
if (lock->rw == 0 && ((lock->flags & ~TTM_WRITE_LOCK_PENDING) == 0)) {
lock->rw = -1;
lock->flags &= ~TTM_WRITE_LOCK_PENDING;
@@ -182,88 +164,6 @@ int ttm_write_lock(struct ttm_lock *lock, bool 
interruptible)
return ret;
 }
 
-static int __ttm_vt_unlock(struct ttm_lock *lock)
-{
-   int ret = 0;
-
-   spin_lock(&lock->lock);
-   if (unlikely(!(lock->flags & TTM_VT_LOCK)))
-   ret = -EINVAL;
-   lock->flags &= ~TTM_VT_LOCK;
-   wake_up_all(&lock->queue);
-   spin_unlock(&lock->lock);
-
-   return ret;
-}
-
-static void ttm_vt_lock_remove(struct ttm_base_object **p_base)
-{
-   struct ttm_base_object *base = *p_base;
-   struct ttm_lock *lock = container_of(base, struct ttm_lock, base);
-   int ret;
-
-   *p_base = NULL;
-   ret = __ttm_vt_unlock(lock);
-   BUG_ON(ret != 0);
-}
-
-static bool __ttm_vt_lock(struct ttm_lock *lock)
-{
-   bool locked = false;
-
-   spin_lock(&lock->lock);
-   if (lock->rw == 0) {
-   lock->flags &= ~TTM_VT_LOCK_PENDING;
-   lock->flags |= TTM_VT_LOCK;
-   locked = true;
-   } else {
-   lock->flags |= TTM_VT_LOCK_PENDING;
-   }
-   spin_unlock(&lock->lock);
-   return locked;
-}
-
-int ttm_vt_lock(struct ttm_lock *lock,
-   bool interruptible,
-   struct ttm_object_file *tfile)
-{
-   int ret = 0;
-
-   if (interruptible) {
-   ret = wait_event_interruptible(lock->queue,
-  __ttm_vt_lock(lock));
-   if (unlikely(ret != 0)) {
-   spin_lock(&lock->lock);
-   lock->flags &= ~TTM_VT_LOCK_PENDING;
-   wake_up_all(&lock->queue);
-   spin_unlock(&lock->lock);
-   return ret;
-   }
-   } else
-   wait_event(lock->queue, __ttm_vt_lock(lock));
-
-   /*
-* Add a base-object, the destructor of which will
-* make sure the lock is released i

[git pull] vmwgfx-fixes-5.2

2019-06-18 Thread VMware
Dave, Daniel,

A couple of fixes for vmwgfx. Two fixes for a DMA sg-list debug warning
message. These are not cc'd stable since there is no evidence of actual
breakage.
On fix for the high-bandwidth backdoor port which is cc'd stable due to
upcoming hardware, on which the code would otherwise break.

The following changes since commit 5ed7f4b5eca11c3c69e7c8b53e4321812bc1ee1e:

  drm/vmwgfx: integer underflow in vmw_cmd_dx_set_shader() leading to an 
invalid read (2019-05-21 10:23:10 +0200)

are available in the Git repository at:

  git://people.freedesktop.org/~thomash/linux vmwgfx-fixes-5.2

for you to fetch changes up to 39916897cd815a0ee07ba1f6820cf88a63e459fc:

  drm/vmwgfx: fix a warning due to missing dma_parms (2019-06-11 17:00:53 +0200)


Qian Cai (1):
  drm/vmwgfx: fix a warning due to missing dma_parms

Thomas Hellstrom (2):
  drm/vmwgfx: Use the backdoor port if the HB port is not available
  drm/vmwgfx: Honor the sg list segment size limitation

 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c|   3 +
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c| 146 +++--
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  10 +-
 3 files changed, 125 insertions(+), 34 deletions(-)
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[git pull] vmwgfx-next

2019-06-19 Thread VMware
Dave, Daniel

- The coherent memory changes including mm changes.
- Some vmwgfx debug fixes.
- Removal of vmwgfx legacy security checks.

The following changes since commit 561564bea3248293398dc32ec36da40fb71faed0:

  Merge tag 'omapdrm-5.3' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux into drm-next 
(2019-06-11 13:29:33 +0200)

are available in the Git repository at:

  git://people.freedesktop.org/~thomash/linux/ vmwgfx-next

for you to fetch changes up to 9bbfda544ed79e8e9abde27bfe2c85428d582e7b:

  drm/vmwgfx: Kill unneeded legacy security features (2019-06-18 15:22:48 +0200)


Deepak Rawat (2):
  drm/vmwgfx: Add debug message for layout change ioctl
  drm/vmwgfx: Use VMW_DEBUG_KMS for vmwgfx mode-setting user errors

Thomas Hellstrom (11):
  drm/vmwgfx: Assign eviction priorities to resources
  mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem
  mm: Add an apply_to_pfn_range interface
  mm: Add write-protect and clean utilities for address space ranges
  drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct
  drm/ttm: TTM fault handler helpers
  drm/vmwgfx: Implement an infrastructure for write-coherent resources
  drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
  drm/vmwgfx: Implement an infrastructure for read-coherent resources
  drm/vmwgfx: Add surface dirty-tracking callbacks
  drm/vmwgfx: Kill unneeded legacy security features

 MAINTAINERS|   1 +
 drivers/gpu/drm/ttm/ttm_bo.c   |   1 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c| 169 +---
 drivers/gpu/drm/vmwgfx/Kconfig |   1 +
 drivers/gpu/drm/vmwgfx/Makefile|   2 +-
 .../drm/vmwgfx/device_include/svga3d_surfacedefs.h | 233 +-
 drivers/gpu/drm/vmwgfx/ttm_lock.c  | 100 -
 drivers/gpu/drm/vmwgfx/ttm_lock.h  |  30 --
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c |  12 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_context.c|   4 +
 drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c|  13 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 167 +---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 139 --
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c|   1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c|  23 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 472 +
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c   | 245 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h  |  15 +
 drivers/gpu/drm/vmwgfx/vmwgfx_shader.c |   8 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c| 405 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c |  74 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.h |  16 +-
 include/drm/ttm/ttm_bo_api.h   |  10 +
 include/drm/ttm/ttm_bo_driver.h|   6 +
 include/linux/mm.h |  19 +-
 include/uapi/drm/vmwgfx_drm.h  |   4 +-
 mm/Kconfig |   3 +
 mm/Makefile|   1 +
 mm/as_dirty_helpers.c  | 300 +
 mm/memory.c| 145 +--
 30 files changed, 2136 insertions(+), 483 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
 create mode 100644 mm/as_dirty_helpers.c
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [PATCH v2 00/18] drm/ttm: make ttm bo a gem bo subclass

2019-06-21 Thread VMware



On 6/21/19 1:57 PM, Gerd Hoffmann wrote:

Aargh. Please don't do this. Multiple reasons:

1) I think It's bad to dump all buffer object functionality we can 
possibly think of in a single struct and force that on all (well at 
least most) users. It's better to isolate functionality in structs, have 
utility functions for those and let the drivers derive their buffer 
objects from whatever functionality they actually need.
2) vmwgfx is not using gem and we don't want to carry that extra payload 
in the buffer object.
3) TTM historically hasn't been using the various drm layers except for 
later when common helpers have been used, (like the vma manager and the 
cache utilities). It's desirable to keep that layer distinction. (which 
is really what I'm saying in 1.)


Now if more and more functionality that originated in TTM is moving into 
GEM we need to find a better way to do that without duplicating 
functionality. I suggest adding pointers in the TTM structs and 
defaulting those pointers to the member in the TTM struct. Optionally to 
to the member in the GEM struct. If we need to migrate those members out 
of the TTM struct, vmwgfx would have to provide them in its own buffer 
class.


NAK from the vmwgfx side.

Thanks,
Thoams


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: drm pull for v5.3-rc1

2019-07-15 Thread VMware

Hi, All.

On 7/15/19 8:00 PM, Linus Torvalds wrote:

On Mon, Jul 15, 2019 at 10:37 AM Linus Torvalds
 wrote:

I'm not pulling this. Why did you merge it into your tree, when
apparently you were aware of how questionable it is judging by the drm
pull request.

Looking at some of the fallout, I also see that you then added that
"adjust apply_to_pfn_range interface for dropped token" patch that
seems to be for easier merging of this all.

But you remove the 'token' entirely in one place, and in another you
keep it and just say "whatever, it's unused, pass in NULL". WHAA?

As part of looking at this all, I also note that some of this is also
very non-kernely.

The whole thing with trying to implement a "closure" in C is simply
not how we do things in the kernel (although I've admittedly seen
signs of it in some drivers).

If this should be done at all (and that's questionable), at least do
it in the canonical kernel way: pass in a separate "actor" function
pointer and an argument block, don't try to mix function pointers and
argument data and call it a "closure".

We try to keep data and functions separate. It's not even for security
concerns (although those have caused some splits in the past - avoid
putting function pointers in structures that you then can't mark
read-only!), it's a more generic issue of just keeping arguments as
arguments - even if you then make a structure of them in order to not
make the calling convention very complicated.

(Yes, we do have the pattern of sometimes mixing function pointers
with "describing data", ie the "struct file_operations" structure
isn't _just_ actual function pointers, it also contains the module
owner, for example. But those aren't about mixing function pointers
with their arguments, it's about basically "describing" an object
interface with more than just the operation pointers).

So some of this code is stuff that I would have let go if it was in
some individual driver ("Closures? C doesn't have closures! But
whatever - that driver writer came from some place that taught lamda
calculus before techning C").

But in the core mm code, I want reviews. And I want the code to follow
normal kernel conventions.


Sorry for creating this mess, I guess I need to take another spin at 
this, but first I'd like to straighten out a few details:


- I've never had any kernel code more reviewed than this. It's been out 
on LKML and mm-list and maintainers I think 8 times including the RFC. 
The last time I was explicitly asking if anybody had any objections 
because I wanted to get it merged. It's not an internally-reviewed-only 
thing. There have been a number of people looking at the code and 
leaving comments and requesting fixes including Ralph Campbell, Jerome 
Glisse, Souptick Joarder, Nadav Amit and Christoph Hellwig. Perhaps I 
should have been more explicit in requesting R-Bs after fixing up all 
review comments, but I didn't. None of them had any issues similar to 
the ones you describe above.


- The combined callback / argument struct: It was strongly inspired by 
the struct mm_walk (mm.h), the page walk code being quite similar in 
functionality. "Closure" is perhaps a bad name. Originates in X server code.


Thanks,
Thomas



Linus
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel



___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v7 4/8] mm: Add write-protect and clean utilities for address space ranges

2019-11-07 Thread VMware
From: Thomas Hellstrom 

Add two utilities to 1) write-protect and 2) clean all ptes pointing into
a range of an address space.
The utilities are intended to aid in tracking dirty pages (either
driver-allocated system memory or pci device memory).
The write-protect utility should be used in conjunction with
page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page
accesses. Typically one would want to use this on sparse accesses into
large memory regions. The clean utility should be used to utilize
hardware dirtying functionality and avoid the overhead of page-faults,
typically on large accesses into small memory regions.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Signed-off-by: Thomas Hellstrom 
Acked-by: Andrew Morton 
---
 include/linux/mm.h |  13 +-
 mm/Kconfig |   3 +
 mm/Makefile|   1 +
 mm/mapping_dirty_helpers.c | 315 +
 4 files changed, 331 insertions(+), 1 deletion(-)
 create mode 100644 mm/mapping_dirty_helpers.c

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc292273e6ba..4bc93477375e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2637,7 +2637,6 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, 
void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
   unsigned long size, pte_fn_t fn, void *data);
 
-
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
 extern void kernel_poison_pages(struct page *page, int numpages, int enable);
@@ -2878,5 +2877,17 @@ static inline int pages_identical(struct page *page1, 
struct page *page2)
return !memcmp_pages(page1, page2);
 }
 
+#ifdef CONFIG_MAPPING_DIRTY_HELPERS
+unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
+   pgoff_t first_index, pgoff_t nr,
+   pgoff_t bitmap_pgoff,
+   unsigned long *bitmap,
+   pgoff_t *start,
+   pgoff_t *end);
+
+unsigned long wp_shared_mapping_range(struct address_space *mapping,
+ pgoff_t first_index, pgoff_t nr);
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index a5dae9a7eb51..550f7aceb679 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -736,4 +736,7 @@ config ARCH_HAS_PTE_SPECIAL
 config ARCH_HAS_HUGEPD
bool
 
+config MAPPING_DIRTY_HELPERS
+bool
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index d996846697ef..1937cc251883 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -107,3 +107,4 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_ZONE_DEVICE) += memremap.o
 obj-$(CONFIG_HMM_MIRROR) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c
new file mode 100644
index ..71070dda9643
--- /dev/null
+++ b/mm/mapping_dirty_helpers.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct wp_walk - Private struct for pagetable walk callbacks
+ * @range: Range for mmu notifiers
+ * @tlbflush_start: Address of first modified pte
+ * @tlbflush_end: Address of last modified pte + 1
+ * @total: Total number of modified ptes
+ */
+struct wp_walk {
+   struct mmu_notifier_range range;
+   unsigned long tlbflush_start;
+   unsigned long tlbflush_end;
+   unsigned long total;
+};
+
+/**
+ * wp_pte - Write-protect a pte
+ * @pte: Pointer to the pte
+ * @addr: The virtual page address
+ * @walk: pagetable walk callback argument
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ */
+static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+   struct wp_walk *wpwalk = walk->private;
+   pte_t ptent = *pte;
+
+   if (pte_write(ptent)) {
+   pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
+
+   ptent = pte_wrprotect(old_pte);
+   ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
+   wpwalk->total++;
+   wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
+   wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
+  addr + PAGE_SIZE);
+   }
+
+   return 0;
+}
+
+/**
+ * struct clean_walk - Private struct for the clean_record_pte function.
+ * @base: struct wp_walk we derive from
+ * @bitmap_pgoff: 

[PATCH v7 1/8] mm: Remove BUG_ON mmap_sem not held from xxx_trans_huge_lock()

2019-11-07 Thread VMware
From: Thomas Hellstrom 

The caller needs to make sure that the vma is not torn down during the
lock operation and can also use the i_mmap_rwsem for file-backed vmas.
Remove the BUG_ON. We could, as an alternative, add a test that either
vma->vm_mm->mmap_sem or vma->vm_file->f_mapping->i_mmap_rwsem are held.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Signed-off-by: Thomas Hellstrom 
Acked-by: Kirill A. Shutemov 
---
 include/linux/huge_mm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 93d5cf0bc716..0b84e13e88e2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -216,7 +216,6 @@ static inline int is_swap_pmd(pmd_t pmd)
 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
 {
-   VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
return __pmd_trans_huge_lock(pmd, vma);
else
@@ -225,7 +224,6 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
struct vm_area_struct *vma)
 {
-   VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (pud_trans_huge(*pud) || pud_devmap(*pud))
return __pud_trans_huge_lock(pud, vma);
else
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v7 3/8] mm: Add a walk_page_mapping() function to the pagewalk code

2019-11-07 Thread VMware
From: Thomas Hellstrom 

For users that want to travers all page table entries pointing into a
region of a struct address_space mapping, introduce a walk_page_mapping()
function.

The walk_page_mapping() function will be initially be used for dirty-
tracking in virtual graphics drivers.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Andrew Morton 
---
 include/linux/pagewalk.h |  9 
 mm/pagewalk.c| 94 +++-
 2 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index bddd9759bab9..6ec82e92c87f 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -24,6 +24,9 @@ struct mm_walk;
  * "do page table walk over the current vma", returning
  * a negative value means "abort current page table walk
  * right now" and returning 1 means "skip the current vma"
+ * @pre_vma:if set, called before starting walk on a non-null vma.
+ * @post_vma:   if set, called after a walk on a non-null vma, provided
+ *  that @pre_vma and the vma walk succeeded.
  */
 struct mm_walk_ops {
int (*pud_entry)(pud_t *pud, unsigned long addr,
@@ -39,6 +42,9 @@ struct mm_walk_ops {
 struct mm_walk *walk);
int (*test_walk)(unsigned long addr, unsigned long next,
struct mm_walk *walk);
+   int (*pre_vma)(unsigned long start, unsigned long end,
+  struct mm_walk *walk);
+   void (*post_vma)(struct mm_walk *walk);
 };
 
 /**
@@ -62,5 +68,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
void *private);
 int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private);
+int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
+ pgoff_t nr, const struct mm_walk_ops *ops,
+ void *private);
 
 #endif /* _LINUX_PAGEWALK_H */
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index c5fa42cab14f..ea0b9e606ad1 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -254,13 +254,23 @@ static int __walk_page_range(unsigned long start, 
unsigned long end,
 {
int err = 0;
struct vm_area_struct *vma = walk->vma;
+   const struct mm_walk_ops *ops = walk->ops;
+
+   if (vma && ops->pre_vma) {
+   err = ops->pre_vma(start, end, walk);
+   if (err)
+   return err;
+   }
 
if (vma && is_vm_hugetlb_page(vma)) {
-   if (walk->ops->hugetlb_entry)
+   if (ops->hugetlb_entry)
err = walk_hugetlb_range(start, end, walk);
} else
err = walk_pgd_range(start, end, walk);
 
+   if (vma && ops->post_vma)
+   ops->post_vma(walk);
+
return err;
 }
 
@@ -291,6 +301,11 @@ static int __walk_page_range(unsigned long start, unsigned 
long end,
  * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
  * purpose.
  *
+ * If operations need to be staged before and committed after a vma is walked,
+ * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
+ * since it is intended to handle commit-type operations, can't return any
+ * errors.
+ *
  * struct mm_walk keeps current values of some common data like vma and pmd,
  * which are useful for the access from callbacks. If you want to pass some
  * caller-specific data to callbacks, @private should be helpful.
@@ -377,3 +392,80 @@ int walk_page_vma(struct vm_area_struct *vma, const struct 
mm_walk_ops *ops,
return err;
return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
 }
+
+/**
+ * walk_page_mapping - walk all memory areas mapped into a struct 
address_space.
+ * @mapping: Pointer to the struct address_space
+ * @first_index: First page offset in the address_space
+ * @nr: Number of incremental page offsets to cover
+ * @ops:   operation to call during the walk
+ * @private:   private data for callbacks' usage
+ *
+ * This function walks all memory areas mapped into a struct address_space.
+ * The walk is limited to only the given page-size index range, but if
+ * the index boundaries cross a huge page-table entry, that entry will be
+ * included.
+ *
+ * Also see walk_page_range() for additional information.
+ *
+ * Locking:
+ *   This function can't require that the struct mm_struct::mmap_sem is held,
+ *   since @mapping may be mapped by multiple processes. Instead
+ *   @mapping->i_mmap_rwsem must be held. This might have implications in the
+ *   callbacks, and it's up tho the caller to ensure that the
+ *   struct mm_struct::mmap_sem is not needed.
+ *
+ *   

[PATCH v7 2/8] mm: pagewalk: Take the pagetable lock in walk_pte_range()

2019-11-07 Thread VMware
From: Thomas Hellstrom 

Without the lock, anybody modifying a pte from within this function might
have it concurrently modified by someone else.

Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Suggested-by: Linus Torvalds 
Signed-off-by: Thomas Hellstrom 
Acked-by: Kirill A. Shutemov 
---
 mm/pagewalk.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index d48c2a986ea3..c5fa42cab14f 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -10,8 +10,9 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end,
pte_t *pte;
int err = 0;
const struct mm_walk_ops *ops = walk->ops;
+   spinlock_t *ptl;
 
-   pte = pte_offset_map(pmd, addr);
+   pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (;;) {
err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err)
@@ -22,7 +23,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end,
pte++;
}
 
-   pte_unmap(pte);
+   pte_unmap_unlock(pte, ptl);
return err;
 }
 
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v7 7/8] drm/vmwgfx: Implement an infrastructure for read-coherent resources

2019-11-07 Thread VMware
From: Thomas Hellstrom 

Similar to write-coherent resources, make sure that from the user-space
point of view, GPU rendered contents is automatically available for
reading by the CPU.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   7 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c|  77 -
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  | 103 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h |   2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c|   3 +-
 5 files changed, 181 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index d5fa9b72c8ff..7773952f81f8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -684,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource 
**p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
 extern struct vmw_resource *
 vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+bool dirtying);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -728,6 +729,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
 void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
   pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+   pgoff_t end, pgoff_t *num_prefault);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1421,6 +1424,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
 void vmw_bo_dirty_clear_res(struct vmw_resource *res);
 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end);
 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 060c1e492f25..f07aa857587c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -155,7 +155,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct 
vmw_buffer_object *vbo)
}
 }
 
-
 /**
  * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
  * tracking structure
@@ -173,6 +172,53 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
vmw_bo_dirty_scan_mkwrite(vbo);
 }
 
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+  pgoff_t start, pgoff_t end)
+{
+   struct vmw_bo_dirty *dirty = vbo->dirty;
+   unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+   return;
+
+   wp_shared_mapping_range(mapping, start + offset, end - start);
+   clean_record_shared_mapping_range(mapping, start + offset,
+ end - start, offset,
+ &dirty->bitmap[0], &dirty->start,
+ &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+   pgoff_t start, pgoff_t end)
+{
+   unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+   struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+   vmw_bo_dirty_pre_unmap(vb

[PATCH v7 6/8] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources

2019-11-07 Thread VMware
From: Thomas Hellstrom 

With emulated coherent memory we need to be able to quickly look up
a resource from the MOB offset. Instead of traversing a linked list with
O(n) worst case, use an RBtree with O(log n) worst case complexity.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c   |  5 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h  | 10 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +---
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 4f6a75d42d7f..8b71bf6b58ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -463,6 +463,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
 
WARN_ON(vmw_bo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
 }
@@ -479,6 +480,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object 
*bo)
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
 
WARN_ON(vbo->dirty);
+   WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
 }
@@ -514,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
-   INIT_LIST_HEAD(&vmw_bo->res_list);
+   vmw_bo->res_tree = RB_ROOT;
 
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
  ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 7f2cce016687..d5fa9b72c8ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -100,7 +100,7 @@ struct vmw_fpriv {
 /**
  * struct vmw_buffer_object - TTM buffer object with vmwgfx additions
  * @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
  * @pin_count: pin depth
  * @cpu_writers: Number of synccpu write grabs. Protected by reservation when
  * increased. May be decreased without reservation.
@@ -111,7 +111,7 @@ struct vmw_fpriv {
  */
 struct vmw_buffer_object {
struct ttm_buffer_object base;
-   struct list_head res_list;
+   struct rb_root res_tree;
s32 pin_count;
atomic_t cpu_writers;
/* Not ref-counted.  Protected by binding_mutex */
@@ -160,8 +160,8 @@ struct vmw_res_func;
  * pin-count greater than zero. It is not on the resource LRU lists and its
  * backup buffer is pinned. Hence it can't be evicted.
  * @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
  * @lru_head: List head for the LRU list. Protected by 
@dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
  * @binding_head: List head for the context binding list. Protected by
  * the @dev_priv::binding_mutex
  * @res_free: The resource destructor.
@@ -182,8 +182,8 @@ struct vmw_resource {
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+   struct rb_node mob_node;
struct list_head lru_head;
-   struct list_head mob_head;
struct list_head binding_head;
struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
@@ -737,7 +737,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, 
pgoff_t start,
  */
 static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
 {
-   return !list_empty(&res->mob_head);
+   return !RB_EMPTY_NODE(&res->mob_node);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index b54d60bd845d..a18831e1d353 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -40,11 +40,24 @@
 void vmw_resource_mob_attach(struct vmw_resource *res)
 {
struct vmw_buffer_object *backup = res->backup;
+   struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
 
dma_resv_assert_held(res->backup->base.base.resv);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
-   list_add_tail(&res->mob_head, &backup->res_list);
+
+   while (*new) {
+   struct vmw_resource *this =
+   container_of(*new, struct vmw_resource, mob_node);
+
+   parent = *new;
+   ne

[PATCH v7 8/8] drm/vmwgfx: Add surface dirty-tracking callbacks

2019-11-07 Thread VMware
From: Thomas Hellstrom 

Add the callbacks necessary to implement emulated coherent memory for
surfaces. Add a flag to the gb_surface_create ioctl to indicate that
surface memory should be coherent.
Also bump the drm minor version to signal the availability of coherent
surfaces.

Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Deepak Rawat 
---
 .../device_include/svga3d_surfacedefs.h   | 233 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c   | 395 +-
 include/uapi/drm/vmwgfx_drm.h |   4 +-
 4 files changed, 629 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h 
b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
 }
 
-
 static inline u32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
   surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ 
svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
 }
 
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+   size_t bytes;
+   size_t img_stride;
+   size_t row_stride;
+   struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+   const struct svga3d_surface_desc *desc;
+   struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+   size_t mip_chain_bytes;
+   size_t sheet_bytes;
+   u32 num_mip_levels;
+   u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+   u32 sub_resource;
+   u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+  u32 mip_level, u32 layer)
+{
+   return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+   SVGA3dSurfaceFormat format,
+   u32 num_mip_levels,
+   u32 num_layers,
+   u32 num_samples,
+   struct svga3dsurface_cache *cache)
+{
+   const struct svga3d_surface_desc *desc;
+   u32 i;
+
+   memset(cache, 0, sizeof(*cache));
+   cache->desc = desc = svga3dsurface_get_desc(format);
+   cache->num_mip_levels = num_mip_levels;
+   cache->num_layers = num_layers;
+   for (i = 0; i < cache->num_mip_levels; i++) {
+   struct svga3dsurface_mip *mip = &cache->mip[i];
+
+   mip->size = svga3dsurface_get_mip_size(*size, i);
+   mip->bytes = svga3dsurface_get_image_buffer_size
+   (desc, &mip->size, 0);
+   mip->row_stride =
+   __KERNEL_DIV_ROUND_UP(mip->size.width,
+ 

[PATCH v7 5/8] drm/vmwgfx: Implement an infrastructure for write-coherent resources

2019-11-07 Thread VMware
_dirty;
-   bool backup_dirty;
+   u32 res_dirty : 1;
+   u32 backup_dirty : 1;
+   u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
@@ -180,6 +185,7 @@ struct vmw_resource {
struct list_head lru_head;
struct list_head mob_head;
struct list_head binding_head;
+   struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
 };
@@ -720,6 +726,8 @@ extern void vmw_resource_evict_all(struct vmw_private 
*dev_priv);
 extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
 void vmw_resource_mob_attach(struct vmw_resource *res);
 void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+  pgoff_t end);
 
 /**
  * vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -1407,6 +1415,15 @@ int vmw_host_log(const char *log);
 #define VMW_DEBUG_USER(fmt, ...)  \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
 
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
 /**
  * VMW_DEBUG_KMS - Debug output for kernel mode-setting
  *
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index ff86d49dc5e8..934ad7c0c342 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct 
vmw_private *dev_priv,
 offsetof(typeof(*cmd), sid));
 
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 VMW_RES_DIRTY_NONE, user_surface_converter,
 &cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index ..060c1e492f25
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/******
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
+ * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
+ * accesses in the VM mkwrite() callback
+ */
+enum vmw_bo_dirty_method {
+   VMW_BO_DIRTY_PAGETABLE,
+   VMW_BO_DIRTY_MKWRITE,
+};
+
+/*
+ * No dirtied pages at scan trigger a transition to the _MKWRITE method,
+ * similarly a certain percentage of dirty pages trigger a transition to
+ * the _PAGETABLE method. How many triggers should we wait for before
+ * changing method?
+ */
+#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
+
+/* Percentage to trigger a transition to the _PAGETABLE method */
+#define VMW_DIRTY_PERCENTAGE 10
+
+/**
+ * struct vmw_bo_dirty - Dirty information for buffer objects
+ * @start: First currently dirty bit
+ * @end: Last currently dirty bit + 1
+ * @method: The currently used dirty method
+ * @change_count: Number of con

[PATCH v7 0/8] Emulated coherent graphics memory take 2

2019-11-07 Thread VMware
From: Thomas Hellström 

Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation is signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation.

Provide mm helpers needed for this and that also allow for huge pmd-
and pud entries (patch 1-3), and the associated vmwgfx code (patch 4-7).

The code has been tested and exercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.

I would like to merge this code through the DRM tree, so an ack to include
the new mm helpers in that merge would be greatly appreciated.

Changes since RFC:
- Merge conflict changes moved to the correct patch. Fixes intra-patchset
  compile errors.
- Be more aggressive when turning ttm vm code into helpers. This makes sure
  we can use a const qualifier on the vmwgfx vm_ops.
- Reinstate a lost comment an fix an error path that was broken when turning
  the ttm vm code into helpers.
- Remove explicit type-casts of struct vm_area_struct::vm_private_data
- Clarify the locking inversion that makes us not being able to use the mm
  pagewalk code.

Changes since v1:
- Removed the vmwgfx maintainer entry for as_dirty_helpers.c, updated
  commit message accordingly
- Removed the TTM patches from the series as they are merged separately
  through DRM.
Changes since v2:
- Split out the pagewalk code from as_dirty_helpers.c and document locking.
- Add pre_vma and post_vma callbacks to the pagewalk code.
- Remove huge pmd and -pud asserts that would trip when we protect vmas with
  struct address_space::i_mmap_rwsem rather than with
  struct vm_area_struct::mmap_sem.
- Do some naming cleanup in as_dirty_helpers.c
Changes since v3:
- Extensive renaming of the dirty helpers including the filename.
- Update walk_page_mapping() doc.
- Update the pagewalk code to not unconditionally split pmds if a pte_entry()
  callback is present. Update the dirty helper pmd_entry accordingly.
- Use separate walk ops for the dirty helpers.
- Update the pagewalk code to take the pagetable lock in walk_pte_range.
Changes since v4:
- Fix pte pointer confusion in patch 2/8
- Skip the pagewalk code conditional split patch for now, and update the
  mapping_dirty_helper accordingly. That problem will be solved in a cleaner
  way in a follow-up patchset.
Changes since v5:
- Fix tlb flushing when we have other pending tlb flushes.
Changes since v6:
- Added Andrew Morton R-Bs and acks. Series is now approved to merge through
  DRM: 
https://lore.kernel.org/r/20191105195114.f75be5e76763da5546121...@linux-foundation.org/
- Removed an redundant assignment in patch #8 (Colin Ian King)

  
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Will Deacon 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Cc: Minchan Kim 
Cc: Michal Hocko 
Cc: Huang Ying 
Cc: Jérôme Glisse 
Cc: Kirill A. Shutemov 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions

2019-11-13 Thread VMware
From: Thomas Hellstrom 

We're gradually moving towards using DMA coherent memory in most
situations, although TTM interactions with the DMA layers is still a
work-in-progress. Meanwhile, use coherent memory when there are size
restrictions meaning that there is a chance that streaming dma mapping
of large buffer objects may fail.
Also move DMA mask settings to the vmw_dma_select_mode function, since
it's important that we set the correct DMA masks before calling the
dma_max_mapping_size() function.

Cc: Christoph Hellwig 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 31 +++--
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index fc0283659c41..1e1de83908fe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -569,7 +569,10 @@ static int vmw_dma_select_mode(struct vmw_private 
*dev_priv)
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
 
-   if (vmw_force_coherent)
+   (void) dma_set_mask_and_coherent(dev_priv->dev->dev, DMA_BIT_MASK(64));
+
+   if (vmw_force_coherent ||
+   dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX)
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
@@ -582,30 +585,15 @@ static int vmw_dma_select_mode(struct vmw_private 
*dev_priv)
return -EINVAL;
 
DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]);
-   return 0;
-}
 
-/**
- * vmw_dma_masks - set required page- and dma masks
- *
- * @dev: Pointer to struct drm-device
- *
- * With 32-bit we can only handle 32 bit PFNs. Optionally set that
- * restriction also for 64-bit systems.
- */
-static int vmw_dma_masks(struct vmw_private *dev_priv)
-{
-   struct drm_device *dev = dev_priv->dev;
-   int ret = 0;
-
-   ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64));
if (dev_priv->map_mode != vmw_dma_phys &&
(sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) {
DRM_INFO("Restricting DMA addresses to 44 bits.\n");
-   return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44));
+   return dma_set_mask_and_coherent(dev_priv->dev->dev,
+DMA_BIT_MASK(44));
}
 
-   return ret;
+   return 0;
 }
 
 static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
@@ -674,7 +662,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned 
long chipset)
dev_priv->capabilities2 = vmw_read(dev_priv, SVGA_REG_CAP2);
}
 
-
ret = vmw_dma_select_mode(dev_priv);
if (unlikely(ret != 0)) {
DRM_INFO("Restricting capabilities due to IOMMU setup.\n");
@@ -746,10 +733,6 @@ static int vmw_driver_load(struct drm_device *dev, 
unsigned long chipset)
if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER)
vmw_print_capabilities2(dev_priv->capabilities2);
 
-   ret = vmw_dma_masks(dev_priv);
-   if (unlikely(ret != 0))
-   goto out_err0;
-
dma_set_max_seg_size(dev->dev, min_t(unsigned int, U32_MAX & PAGE_MASK,
 SCATTERLIST_MAX_SEGMENT));
 
-- 
2.21.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/vmwgfx: remove set but not used variable 'srf'

2019-11-13 Thread VMware
From: YueHaibing 

drivers/gpu/drm/vmwgfx/vmwgfx_surface.c:339:22:
 warning: variable srf set but not used [-Wunused-but-set-variable]

'srf' is never used, so can be removed.

Signed-off-by: YueHaibing 
Reviewed-by: Thomas Hellstrom 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 29d8794f0421..de0530b4dc1b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -336,7 +336,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res)
 {
 
struct vmw_private *dev_priv = res->dev_priv;
-   struct vmw_surface *srf;
void *cmd;
 
if (res->func->destroy == vmw_gb_surface_destroy) {
@@ -360,7 +359,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res)
 */
 
mutex_lock(&dev_priv->cmdbuf_mutex);
-   srf = vmw_res_to_srf(res);
dev_priv->used_memory_size -= res->backup_size;
mutex_unlock(&dev_priv->cmdbuf_mutex);
}
-- 
2.21.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/vmwgfx: Use dma-coherent memory for high-bandwidth port messaging

2019-11-13 Thread VMware
From: Thomas Hellstrom 

With AMD-SEV high-bandwidth port messaging runs into trouble since the
message content is encrypted using the vm-specific key, and the
hypervisor is unable to read it.

So use unencrypted dma-coherent bounce buffers for temporary message
storage space. Allocating that memory is expensive so a future
optimization might include a static unencrypted memory area for messages.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  4 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  4 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 45 +
 3 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 81a95651643f..fc0283659c41 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -908,13 +908,13 @@ static int vmw_driver_load(struct drm_device *dev, 
unsigned long chipset)
 
snprintf(host_log, sizeof(host_log), "vmwgfx: %s-%s",
VMWGFX_REPO, VMWGFX_GIT_VERSION);
-   vmw_host_log(host_log);
+   vmw_host_log(dev->dev, host_log);
 
memset(host_log, 0, sizeof(host_log));
snprintf(host_log, sizeof(host_log), "vmwgfx: Module Version: %d.%d.%d",
VMWGFX_DRIVER_MAJOR, VMWGFX_DRIVER_MINOR,
VMWGFX_DRIVER_PATCHLEVEL);
-   vmw_host_log(host_log);
+   vmw_host_log(dev->dev, host_log);
 
if (dev_priv->enable_fb) {
vmw_fifo_resource_inc(dev_priv);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b18842f73081..a77bf72cb9ac 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1389,9 +1389,9 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
struct vmw_diff_cpy *diff);
 
 /* Host messaging -vmwgfx_msg.c: */
-int vmw_host_get_guestinfo(const char *guest_info_param,
+int vmw_host_get_guestinfo(struct device *dev, const char *guest_info_param,
   char *buffer, size_t *length);
-int vmw_host_log(const char *log);
+int vmw_host_log(struct device *dev, const char *log);
 
 /* VMW logging */
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index b6c5e4c2ac3c..f439b7afa3a5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -304,8 +304,8 @@ STACK_FRAME_NON_STANDARD(vmw_send_msg);
  * @msg:  [OUT] message received from the host
  * @msg_len: message length
  */
-static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
-   size_t *msg_len)
+static int vmw_recv_msg(struct device *dev, struct rpc_channel *channel,
+   void **msg, size_t *msg_len, dma_addr_t *dma_handle)
 {
unsigned long eax, ebx, ecx, edx, si, di;
char *reply;
@@ -339,7 +339,8 @@ static int vmw_recv_msg(struct rpc_channel *channel, void 
**msg,
return 0;
 
reply_len = ebx;
-   reply = kzalloc(reply_len + 1, GFP_KERNEL);
+   reply = dma_alloc_coherent(dev, reply_len + 1, dma_handle,
+  GFP_KERNEL);
if (!reply) {
DRM_ERROR("Cannot allocate memory for host message 
reply.\n");
return -ENOMEM;
@@ -350,7 +351,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void 
**msg,
ebx = vmw_port_hb_in(channel, reply, reply_len,
 !!(HIGH_WORD(ecx) & MESSAGE_STATUS_HB));
if ((HIGH_WORD(ebx) & MESSAGE_STATUS_SUCCESS) == 0) {
-   kfree(reply);
+   dma_free_coherent(dev, reply_len + 1, reply, 
*dma_handle);
reply = NULL;
if ((HIGH_WORD(ebx) & MESSAGE_STATUS_CPT) != 0) {
/* A checkpoint occurred. Retry. */
@@ -374,7 +375,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void 
**msg,
eax, ebx, ecx, edx, si, di);
 
if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0) {
-   kfree(reply);
+   dma_free_coherent(dev, reply_len + 1, reply, 
*dma_handle);
reply = NULL;
if ((HIGH_WORD(ecx) & MESSAGE_STATUS_CPT) != 0) {
/* A checkpoint occurred. Retry. */
@@ -404,18 +405,21 @@ STACK_FRAME_NON_STANDARD(vmw_recv_msg);
  * Gets the value of a  GuestInfo.* parameter.  The value returned will be in
  * a string, and it is up to the caller to post-process.
  *
+ * @dev: Pointer to struct device used for coherent memory allocation
  * @guest_info_param:  Parameter to get, e.g. GuestInfo.svga.gl3
  * @buffer: if NULL, *reply_len will contain reply size.
  * @length: size of the reply_buf.  Set to s

[git pull] vmwgfx-coherent

2019-11-13 Thread VMware
Dave, Daniel,

Take 2 of the coherent memory patches are now ready for pulling.
Following the discussion we had after the last merge attempt that had to
be reverted, it might make sense to send this to Linus as a pull request
separate from other DRM stuff.

This time Linus has been heavily involved in the outcome of the -mm patches,
and Andrew Morton has acked them for merging through DRM:

https://lore.kernel.org/r/20191105195114.f75be5e76763da5546121...@linux-foundation.org/

If you think it's too late for the 5.5 merge window, deferring to 5.6 is
not a problem.

There is a dependency on drm-misc:
This needs to be merged after drm-misc caa478af4812, to avoid a compilation
error.

Cover message:
Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation is signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation.

Provide mm helpers needed for this and that also allow for huge pmd-
and pud entries (patch 1-3), and the associated vmwgfx code (patch 4-7).

The code has been tested and exercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.

The following changes since commit 7aef29f4d4613570f413301b0807ea66a21f5e3b:

  drm/ttm: Convert vm callbacks to helpers (2019-11-06 13:02:00 +0100)

are available in the Git repository at:

  git://people.freedesktop.org/~thomash/linux vmwgfx-coherent

for you to fetch changes up to 9ca7d19ff8ba6207bccab46536814fe4839df80a:

  drm/vmwgfx: Add surface dirty-tracking callbacks (2019-11-06 15:45:32 +0100)


Thomas Hellstrom (8):
  mm: Remove BUG_ON mmap_sem not held from xxx_trans_huge_lock()
  mm: pagewalk: Take the pagetable lock in walk_pte_range()
  mm: Add a walk_page_mapping() function to the pagewalk code
  mm: Add write-protect and clean utilities for address space ranges
  drm/vmwgfx: Implement an infrastructure for write-coherent resources
  drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
  drm/vmwgfx: Implement an infrastructure for read-coherent resources
  drm/vmwgfx: Add surface dirty-tracking callbacks

 drivers/gpu/drm/vmwgfx/Kconfig |   1 +
 drivers/gpu/drm/vmwgfx/Makefile|   2 +-
 .../drm/vmwgfx/device_include/svga3d_surfacedefs.h | 233 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c |  10 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h|  44 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c|   1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 488 +
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c   | 193 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h  |  13 +
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c| 395 -
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c   |  15 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c |  74 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.h |  16 +-
 include/linux/huge_mm.h|   2 -
 include/linux/mm.h |  13 +-
 include/linux/pagewalk.h   |   9 +
 include/uapi/drm/vmwgfx_drm.h  |   4 +-
 mm/Kconfig |   3 +
 mm/Makefile|   1 +
 mm/mapping_dirty_helpers.c | 315 +
 mm/pagewalk.c  |  99 -
 21 files changed, 1875 insertions(+), 56 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
 create mode 100644 mm/mapping_dirty_helpers.c
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [PATCH] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions

2019-11-13 Thread VMware

Hi,

On 11/13/19 3:16 PM, Christoph Hellwig wrote:

On Wed, Nov 13, 2019 at 10:51:43AM +0100, Thomas Hellström (VMware) wrote:

From: Thomas Hellstrom 

We're gradually moving towards using DMA coherent memory in most
situations, although TTM interactions with the DMA layers is still a
work-in-progress. Meanwhile, use coherent memory when there are size
restrictions meaning that there is a chance that streaming dma mapping
of large buffer objects may fail.
Also move DMA mask settings to the vmw_dma_select_mode function, since
it's important that we set the correct DMA masks before calling the
dma_max_mapping_size() function.

Cc: Christoph Hellwig 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 31 +++--
  1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index fc0283659c41..1e1de83908fe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -569,7 +569,10 @@ static int vmw_dma_select_mode(struct vmw_private 
*dev_priv)
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
  
-	if (vmw_force_coherent)

+   (void) dma_set_mask_and_coherent(dev_priv->dev->dev, DMA_BIT_MASK(64));

Please don't use void cast on returns.  Also this genercally can't
fail, so if it fails anyway it is fatal, and you should actually
return an error.


OK. Will fix.




+   if (vmw_force_coherent ||
+   dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX)

I don't think this is the right check to see if swiotlb would be
used.  You probably want to call dma_addressing_limited().  Please
also add a comment explaining the call here.


If I understand things correctly, dma_addressing_limited() would always 
return false on vmwgfx, at least if the "restrict to 44 bit" option is 
removed. The "odd" modes we want to catch is someone setting 
swiotlb=force, or someone enabling SEV. In both cases, vmwgfx would 
break down due to limited DMA space even if streaming DMA was fixed with 
appropriate sync calls.


The same holds for vmw_pvscsi (which we discussed before) where the 
large queue depth will typically fill the SWIOTLB causing excessive 
non-fatal error logging.
dma_max_mapping_size() currently catch these modes, but best I guess 
would be dma_swiotlb_forced() function that could be used in cases like 
this?






if (dev_priv->map_mode != vmw_dma_phys &&

AFAIK vmw_dma_phys can't even be set in current mainline and is dead
code.  Can you check if I'm missing something?  Certainly all three
branches above don't set it.


I'll remove that dead code for v2.




(sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) {
DRM_INFO("Restricting DMA addresses to 44 bits.\n");
-   return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44));
+   return dma_set_mask_and_coherent(dev_priv->dev->dev,
+DMA_BIT_MASK(44));

Can you keep setting the DMA mask together?


E.g. make the whole thing something like:

static int vmw_dma_select_mode(struct vmw_private *dev_priv)
{
if (dma_addressing_limited(dev_priv->dev->dev) || vmw_force_coherent) {
/*
 * XXX: what about AMD iommu?  virtio-iommu?  Also
 * swiotlb should be always available where we can
 * address more than 32-bit of memory..
 */
if (!IS_ENABLED(CONFIG_SWIOTLB) &&
!IS_ENABLED(CONFIG_INTEL_IOMMU))
return -EINVAL;


The above check is only to see if coherent memory functionality is 
really compiled in into TTM. We have a patchset that got lost in the 
last merge window to fix this properly and avoid confusion about this. 
I'll rebase on that.



dev_priv->map_mode = vmw_dma_alloc_coherent;
} else if (vmw_restrict_iommu) {
dev_priv->map_mode = vmw_dma_map_bind;
} else {
dev_priv->map_mode = vmw_dma_map_populate;
}

/*
 * On 32-bit systems we can only handle 32 bit PFNs. Optionally set
 * that restriction also for 64-bit systems.
 */
return dma_set_mask_and_coherent(dev->dev,
(IS_ENABLED(CONFIG_64BIT) && !vmw_restrict_dma_mask) ?
64 : 44);
}


Thanks,

Thomas

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: drm/vmwgfx: Use dma-coherent memory for high-bandwidth port messaging

2019-11-13 Thread VMware

Hi, Nathan,

On 11/13/19 9:01 PM, Nathan Chancellor wrote:

On Wed, Nov 13, 2019 at 10:51:42AM +0100, Thomas Hellström (VMware) wrote:

From: Thomas Hellstrom 

With AMD-SEV high-bandwidth port messaging runs into trouble since the
message content is encrypted using the vm-specific key, and the
hypervisor is unable to read it.

So use unencrypted dma-coherent bounce buffers for temporary message
storage space. Allocating that memory is expensive so a future
optimization might include a static unencrypted memory area for messages.

Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 

Hi Thomas,

The 0day team has been doing clang builds for us and sending the results
to our mailing list for triage; this patch causes the following warning.
Seems legitimate, mind taking a look at it and resolving it how you see
fit?

Cheers,
Nathan


This should be harmless as dma_free_coherent() with reply == NULL is a 
nop, but anyway I'll respin to silence the warning.


Thanks,

Thomas



On Thu, Nov 14, 2019 at 03:36:44AM +0800, kbuild test robot wrote:

CC: kbuild-...@lists.01.org
In-Reply-To: <20191113095144.2981-1-thomas...@shipmail.org>
References: <20191113095144.2981-1-thomas...@shipmail.org>
TO: "Thomas Hellström (VMware)" 
CC: dri-devel@lists.freedesktop.org, Thomas Hellstrom , Brian Paul 
, Thomas Hellstrom , Brian Paul 

CC: Thomas Hellstrom , Brian Paul 

Hi "Thomas,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.4-rc7 next-20191113]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Thomas-Hellstr-m-VMware/drm-vmwgfx-Use-dma-coherent-memory-for-high-bandwidth-port-messaging/20191114-020818
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
0e3f1ad80fc8cb0c517fd9a9afb22752b741fa76
config: x86_64-rhel-7.6 (attached as .config)
compiler: clang version 10.0.0 (git://gitmirror/llvm_project 
335ac2eb662ce5f1888e2a50310b01fba2d40d68)
reproduce:
 # save the attached .config to linux build tree
 make ARCH=x86_64

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All warnings (new ones prefixed by >>):


drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:441:6: warning: variable 'reply_handle' is 
used uninitialized whenever '||' condition is true [-Wsometimes-uninitialized]

if (vmw_send_msg(&channel, msg) ||
^~~
drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:467:47: note: uninitialized use occurs 
here
dma_free_coherent(dev, reply_len + 1, reply, reply_handle);
 ^~~~
drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:441:6: note: remove the '||' if its 
condition is always false
if (vmw_send_msg(&channel, msg) ||
^~
drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:421:37: note: initialize the variable 
'reply_handle' to silence this warning
dma_addr_t req_handle, reply_handle;
   ^
= 0
1 warning generated.

vim +441 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c

89da76fde68de1 Sinclair Yeh  2016-04-27  400
89da76fde68de1 Sinclair Yeh  2016-04-27  401
89da76fde68de1 Sinclair Yeh  2016-04-27  402  /**
89da76fde68de1 Sinclair Yeh  2016-04-27  403   * vmw_host_get_guestinfo: 
Gets a GuestInfo parameter
89da76fde68de1 Sinclair Yeh  2016-04-27  404   *
89da76fde68de1 Sinclair Yeh  2016-04-27  405   * Gets the value of a  
GuestInfo.* parameter.  The value returned will be in
89da76fde68de1 Sinclair Yeh  2016-04-27  406   * a string, and it is up to 
the caller to post-process.
89da76fde68de1 Sinclair Yeh  2016-04-27  407   *
6bdb21230a2a01 Thomas Hellstrom  2019-11-13  408   * @dev: Pointer to struct 
device used for coherent memory allocation
89da76fde68de1 Sinclair Yeh  2016-04-27  409   * @guest_info_param:  
Parameter to get, e.g. GuestInfo.svga.gl3
89da76fde68de1 Sinclair Yeh  2016-04-27  410   * @buffer: if NULL, 
*reply_len will contain reply size.
89da76fde68de1 Sinclair Yeh  2016-04-27  411   * @length: size of the 
reply_buf.  Set to size of reply upon return
89da76fde68de1 Sinclair Yeh  2016-04-27  412   *
89da76fde68de1 Sinclair Yeh  2016-04-27  413   * Returns: 0 on success
89da76fde68de1 Sinclair Yeh  2016-04-27  414   */
6bdb21230a2a01 Thomas Hellstrom  2019-11-13  415  int 
vmw_host_get_guestinfo(struct device *dev, const char *guest_info_param,
89da76fde68de1 Sinclair Yeh  2016-04-27  416   char 
*buffer,

[PATCH 3/3] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions

2019-11-14 Thread VMware
From: Thomas Hellstrom 

We're gradually moving towards using DMA coherent memory in most
situations, although TTM interactions with the DMA layers is still a
work-in-progress. Meanwhile, use coherent memory when there are size
restrictions meaning that there is a chance that streaming dma mapping
of large buffer objects may fail.

Cc: Christoph Hellwig 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Brian Paul 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 8d479a411cdd..24f8d88d4b28 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -565,7 +565,15 @@ static int vmw_dma_select_mode(struct vmw_private 
*dev_priv)
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
 
-   if (vmw_force_coherent)
+   /*
+* dma_max_mapping_size() != SIZE_MAX means something is going
+* on in the dma layer that the dma_map_bind or dma_map_populate modes
+* are not working well with, or haven't been tested with.
+* This typically happens when the SWIOTLB is active. Fall back to
+* coherent memory in those cases.
+*/
+   if (dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX ||
+   vmw_force_coherent)
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
@@ -668,10 +676,8 @@ static int vmw_driver_load(struct drm_device *dev, 
unsigned long chipset)
dev_priv->capabilities2 = vmw_read(dev_priv, SVGA_REG_CAP2);
}
 
-
-   ret = vmw_dma_select_mode(dev_priv);
-   if (unlikely(ret != 0)) {
-   DRM_INFO("Restricting capabilities due to IOMMU setup.\n");
+   if (vmw_dma_masks(dev_priv) || vmw_dma_select_mode(dev_priv)) {
+   DRM_WARN("Refusing DMA due to lack of DMA support.");
refuse_dma = true;
}
 
@@ -740,10 +746,6 @@ static int vmw_driver_load(struct drm_device *dev, 
unsigned long chipset)
if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER)
vmw_print_capabilities2(dev_priv->capabilities2);
 
-   ret = vmw_dma_masks(dev_priv);
-   if (unlikely(ret != 0))
-   goto out_err0;
-
dma_set_max_seg_size(dev->dev, min_t(unsigned int, U32_MAX & PAGE_MASK,
 SCATTERLIST_MAX_SEGMENT));
 
-- 
2.21.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 1/3] drm/vmwgfx: Remove the vmw_dma_phys mode code

2019-11-14 Thread VMware
From: Thomas Hellstrom 

This mode is not used anymore. Remove the dead code.

Signed-off-by: Thomas Hellstrom 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 4 +---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 9 -
 3 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index e962048f65d2..60ef03120917 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -564,7 +564,6 @@ static void vmw_get_initial_size(struct vmw_private 
*dev_priv)
 static int vmw_dma_select_mode(struct vmw_private *dev_priv)
 {
static const char *names[vmw_dma_map_max] = {
-   [vmw_dma_phys] = "Using physical TTM page addresses.",
[vmw_dma_alloc_coherent] = "Using coherent TTM pages.",
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
@@ -598,8 +597,7 @@ static int vmw_dma_masks(struct vmw_private *dev_priv)
int ret = 0;
 
ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64));
-   if (dev_priv->map_mode != vmw_dma_phys &&
-   (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) {
+   if (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask) {
DRM_INFO("Restricting DMA addresses to 44 bits.\n");
return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44));
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b18842f73081..761dbd424749 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -281,7 +281,6 @@ struct vmw_res_cache_entry {
  * enum vmw_dma_map_mode - indicate how to perform TTM page dma mappings.
  */
 enum vmw_dma_map_mode {
-   vmw_dma_phys,   /* Use physical page addresses */
vmw_dma_alloc_coherent, /* Use TTM coherent pages */
vmw_dma_map_populate,   /* Unmap from DMA just after unpopulate */
vmw_dma_map_bind,   /* Unmap from DMA just before unbind */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index d8ea3dd10af0..8c0135f1c346 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -295,11 +295,6 @@ static struct page *__vmw_piter_non_sg_page(struct 
vmw_piter *viter)
  * pointed to by @viter. Functions are selected depending on the
  * current mapping mode.
  */
-static dma_addr_t __vmw_piter_phys_addr(struct vmw_piter *viter)
-{
-   return page_to_phys(viter->pages[viter->i]);
-}
-
 static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter)
 {
return viter->addrs[viter->i];
@@ -329,10 +324,6 @@ void vmw_piter_start(struct vmw_piter *viter, const struct 
vmw_sg_table *vsgt,
viter->page = &__vmw_piter_non_sg_page;
viter->pages = vsgt->pages;
switch (vsgt->mode) {
-   case vmw_dma_phys:
-   viter->next = &__vmw_piter_non_sg_next;
-   viter->dma_address = &__vmw_piter_phys_addr;
-   break;
case vmw_dma_alloc_coherent:
viter->next = &__vmw_piter_non_sg_next;
viter->dma_address = &__vmw_piter_dma_addr;
-- 
2.21.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 2/3] drm/vmwgfx: Remove the restrict_dma_mask module parameter

2019-11-14 Thread VMware
From: Thomas Hellstrom 

The restrict_dma_mask was mainly used for iommu functionality debugging
and has no use anymore. Remove it.

Signed-off-by: Thomas Hellstrom 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 60ef03120917..8d479a411cdd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -255,7 +255,6 @@ static int enable_fbdev = 
IS_ENABLED(CONFIG_DRM_VMWGFX_FBCON);
 static int vmw_force_iommu;
 static int vmw_restrict_iommu;
 static int vmw_force_coherent;
-static int vmw_restrict_dma_mask;
 static int vmw_assume_16bpp;
 
 static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
@@ -270,8 +269,6 @@ MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage 
for TTM pages");
 module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600);
 MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
 module_param_named(force_coherent, vmw_force_coherent, int, 0600);
-MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
-module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
 MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
 module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
@@ -597,7 +594,7 @@ static int vmw_dma_masks(struct vmw_private *dev_priv)
int ret = 0;
 
ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64));
-   if (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask) {
+   if (sizeof(unsigned long) == 4) {
DRM_INFO("Restricting DMA addresses to 44 bits.\n");
return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44));
}
-- 
2.21.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 0/3] drm/vmwgfx: Clean- and fix up DMA mode selection

2019-11-14 Thread VMware
From: Thomas Hellstrom (VMware) 

Clean up and fix dma mode selection. We remove an unused mode, a largely
unused module option and finally add a check to enable dma coherent memory
in those cases streaming dma mappings are undesired or won't work.

Note that this series is intended to be applied on top of
git://people.freedesktop.org/~thomash/linux branch vmwgfx-next

meaning that the vmw_dma_select mode function would, after applying the patches
look like

static int vmw_dma_select_mode(struct vmw_private *dev_priv)
{
static const char *names[vmw_dma_map_max] = {
[vmw_dma_alloc_coherent] = "Using coherent TTM pages.",
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};

/*
 * dma_max_mapping_size() != SIZE_MAX means something is going
 * on in the dma layer that the dma_map_bind or dma_map_populate modes
 * are not working well with, or haven't been tested with.
 * This typically happens when the SWIOTLB is active. Fall back to
 * coherent memory in those cases.
 */
if (dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX ||
vmw_force_coherent)
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
else
dev_priv->map_mode = vmw_dma_map_populate;

if (!IS_ENABLED(CONFIG_DRM_TTM_DMA_PAGE_POOL) &&
(dev_priv->map_mode == vmw_dma_alloc_coherent))
return -EINVAL;

DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]);
return 0;
}

Cc: Christoph Hellwig 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [PATCH 3/3] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions

2019-11-14 Thread VMware

On 11/14/19 1:40 PM, Christoph Hellwig wrote:

On Thu, Nov 14, 2019 at 11:56:45AM +0100, Thomas Hellström (VMware) wrote:

From: Thomas Hellstrom 

We're gradually moving towards using DMA coherent memory in most
situations, although TTM interactions with the DMA layers is still a
work-in-progress. Meanwhile, use coherent memory when there are size
restrictions meaning that there is a chance that streaming dma mapping
of large buffer objects may fail.

Unofrtunately that dma mapping size check really is completely
broken.  For example the sparc32 iommus have mapping size limitations
(which we just haven't wired up yet), but will never bounce buffer.

Let me cook up a real API for you instead.  dma_addressing_limited()
is fundamentally the right call for this, we just need to make it
handle the corner cases you mentioned in reply to the last version of
your patch.


Sounds great!

Thanks,

Thomas


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[git pull] vmwgfx-next

2019-11-14 Thread VMware
Dave, Daniel

Two minor cleanups / fixes for -next.

The following changes since commit 3ca3a9eab7085b3c938b5d088c3020269cfecdc8:

  Merge tag 'drm-misc-next-fixes-2019-11-06' of 
git://anongit.freedesktop.org/drm/drm-misc into drm-next (2019-11-08 16:48:22 
+1000)

are available in the Git repository at:

  git://people.freedesktop.org/~thomash/linux vmwgfx-next

for you to fetch changes up to b4011644b03c04b1a42068313c5b894da41d0698:

  drm/vmwgfx: remove set but not used variable 'srf' (2019-11-14 08:41:36 +0100)


Thomas Hellstrom (1):
  drm/ttm, drm/vmwgfx: Use a configuration option for the TTM dma page pool

YueHaibing (1):
  drm/vmwgfx: remove set but not used variable 'srf'

 drivers/gpu/drm/Kconfig  | 7 +++
 drivers/gpu/drm/ttm/Makefile | 4 ++--
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 3 ---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c  | 3 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c  | 2 --
 include/drm/ttm/ttm_page_alloc.h | 2 +-
 6 files changed, 11 insertions(+), 10 deletions(-)
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH v4 2/2] mm, drm/ttm: Fix vm page protection handling

2019-12-12 Thread VMware
From: Thomas Hellstrom 

TTM graphics buffer objects may, transparently to user-space,  move
between IO and system memory. When that happens, all PTEs pointing to the
old location are zapped before the move and then faulted in again if
needed. When that happens, the page protection caching mode- and
encryption bits may change and be different from those of
struct vm_area_struct::vm_page_prot.

We were using an ugly hack to set the page protection correctly.
Fix that and instead export and use vmf_insert_mixed_prot() or use
vmf_insert_pfn_prot().
Also get the default page protection from
struct vm_area_struct::vm_page_prot rather than using vm_get_page_prot().
This way we catch modifications done by the vm system for drivers that
want write-notification.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 22 +++---
 mm/memory.c |  1 +
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 3e8c3de91ae4..78bfab81cf04 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -173,7 +173,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgoff_t num_prefault)
 {
struct vm_area_struct *vma = vmf->vma;
-   struct vm_area_struct cvma = *vma;
struct ttm_buffer_object *bo = vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
unsigned long page_offset;
@@ -244,7 +243,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
goto out_io_unlock;
}
 
-   cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
+   prot = ttm_io_prot(bo->mem.placement, prot);
if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
@@ -260,7 +259,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
}
} else {
/* Iomem should not be marked encrypted */
-   cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
+   prot = pgprot_decrypted(prot);
}
 
/*
@@ -283,11 +282,20 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pfn = page_to_pfn(page);
}
 
+   /*
+* Note that the value of @prot at this point may differ from
+* the value of @vma->vm_page_prot in the caching- and
+* encryption bits. This is because the exact location of the
+* data may not be known at mmap() time and may also change
+* at arbitrary times while the data is mmap'ed.
+* See vmf_insert_mixed_prot() for a discussion.
+*/
if (vma->vm_flags & VM_MIXEDMAP)
-   ret = vmf_insert_mixed(&cvma, address,
-   __pfn_to_pfn_t(pfn, PFN_DEV));
+   ret = vmf_insert_mixed_prot(vma, address,
+   __pfn_to_pfn_t(pfn, 
PFN_DEV),
+   prot);
else
-   ret = vmf_insert_pfn(&cvma, address, pfn);
+   ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
 
/* Never error on prefaulted PTEs */
if (unlikely((ret & VM_FAULT_ERROR))) {
@@ -319,7 +327,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
if (ret)
return ret;
 
-   prot = vm_get_page_prot(vma->vm_flags);
+   prot = vma->vm_page_prot;
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
diff --git a/mm/memory.c b/mm/memory.c
index 269a8a871e83..0f262acd1018 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1798,6 +1798,7 @@ vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct 
*vma, unsigned long addr,
 {
return __vm_insert_mixed(vma, addr, pfn, pgprot, false);
 }
+EXPORT_SYMBOL(vmf_insert_mixed_prot);
 
 vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn)
-- 
2.21.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 0/2] mm, drm/ttm: Fix pte insertion with customized protection

2019-12-12 Thread VMware
From: Thomas Hellstrom 

The drm/ttm module is using a modified on-stack copy of the
struct vm_area_struct to be able to set a page protection with customized
caching. Fix that by adding a vmf_insert_mixed_prot() function similar
to the existing vmf_insert_pfn_prot() for use with drm/ttm.

I'd like to merge this through a drm tree.

Changes since v1:
*) Formatting fixes in patch 1
*) Updated commit message of patch 2.
Changes since v2:
*) Moved vmf_insert_mixed_prot() export to patch 2 (Michal Hocko)
*) Documented under which conditions it's safe to use a page protection
   different from struct vm_area_struct::vm_page_prot. (Michal Hocko)
Changes since v3:
*) More documentation regarding under which conditions it's safe to use a
   page protection different from struct vm_area_struct::vm_page_prot. This
   time also in core vm. (Michal Hocko)

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 

-- 
2.21.0

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 1/2] mm: Add a vmf_insert_mixed_prot() function

2019-12-12 Thread VMware
From: Thomas Hellstrom 

The TTM module today uses a hack to be able to set a different page
protection than struct vm_area_struct::vm_page_prot. To be able to do
this properly, add the needed vm functionality as vmf_insert_mixed_prot().

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Signed-off-by: Thomas Hellstrom 
Acked-by: Christian König 
---
 include/linux/mm.h   |  2 ++
 include/linux/mm_types.h |  7 ++-
 mm/memory.c  | 43 
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc292273e6ba..29575d3c1e47 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2548,6 +2548,8 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct 
*vma, unsigned long addr,
unsigned long pfn, pgprot_t pgprot);
 vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn);
+vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long 
addr,
+   pfn_t pfn, pgprot_t pgprot);
 vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
unsigned long addr, pfn_t pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned 
long len);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fa795284..ac96afdbb4bc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -307,7 +307,12 @@ struct vm_area_struct {
/* Second cache line starts here. */
 
struct mm_struct *vm_mm;/* The address space we belong to. */
-   pgprot_t vm_page_prot;  /* Access permissions of this VMA. */
+
+   /*
+* Access permissions of this VMA.
+* See vmf_insert_mixed() for discussion.
+*/
+   pgprot_t vm_page_prot;
unsigned long vm_flags; /* Flags, see mm.h. */
 
/*
diff --git a/mm/memory.c b/mm/memory.c
index b1ca51a079f2..269a8a871e83 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1646,6 +1646,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, 
unsigned long addr,
  * vmf_insert_pfn_prot should only be used if using multiple VMAs is
  * impractical.
  *
+ * See vmf_insert_mixed_prot() for a discussion of the implication of using
+ * a value of @pgprot different from that of @vma->vm_page_prot.
+ *
  * Context: Process context.  May allocate using %GFP_KERNEL.
  * Return: vm_fault_t value.
  */
@@ -1719,9 +1722,9 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t 
pfn)
 }
 
 static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
-   unsigned long addr, pfn_t pfn, bool mkwrite)
+   unsigned long addr, pfn_t pfn, pgprot_t pgprot,
+   bool mkwrite)
 {
-   pgprot_t pgprot = vma->vm_page_prot;
int err;
 
BUG_ON(!vm_mixed_ok(vma, pfn));
@@ -1764,10 +1767,42 @@ static vm_fault_t __vm_insert_mixed(struct 
vm_area_struct *vma,
return VM_FAULT_NOPAGE;
 }
 
+/**
+ * vmf_insert_mixed_prot - insert single pfn into user vma with specified 
pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vmf_insert_mixed(), except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * Typically this function should be used by drivers to set caching- and
+ * encryption bits different than those of @vma->vm_page_prot, because
+ * the caching- or encryption mode may not be known at mmap() time.
+ * This is ok as long as @vma->vm_page_prot is not used by the core vm
+ * to set caching and encryption bits for those vmas (except for COW pages).
+ * This is ensured by core vm only modifying these page table entries using
+ * functions that don't touch caching- or encryption bits, using pte_modify()
+ * if needed. (See for example mprotect()).
+ * Also when new page-table entries are created, this is only done using the
+ * fault() callback, and never using the value of vma->vm_page_prot,
+ * except for page-table entries that point to anonymous pages as the result
+ * of COW.
+ *
+ * Context: Process context.  May allocate using %GFP_KERNEL.
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long 
addr,
+pfn_t pfn, pgprot_t pgprot)
+{
+   return __vm_insert_mixed(vma, addr, pfn, pgprot, false);
+}
+
 vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn)
 {
-   return __vm_insert_mixed(vma, addr, pfn, false);
+   return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false);
 }
 EXPORT_SYMBOL(vmf_insert_mixed);
 
@@ -1779,7 +1814,7 @@ EXPORT_SYMBOL(vmf_insert_mixed);
 vm_fault_t vmf_insert_mi

Ack to merge through DRM tree? WAS [PATCH v4 0/2] mm, drm/ttm: Fix pte insertion with customized protection

2019-12-20 Thread VMware

Andrew,

On 12/12/19 9:47 AM, Thomas Hellström (VMware) wrote:

From: Thomas Hellstrom 

The drm/ttm module is using a modified on-stack copy of the
struct vm_area_struct to be able to set a page protection with customized
caching. Fix that by adding a vmf_insert_mixed_prot() function similar
to the existing vmf_insert_pfn_prot() for use with drm/ttm.

I'd like to merge this through a drm tree.

Changes since v1:
*) Formatting fixes in patch 1
*) Updated commit message of patch 2.
Changes since v2:
*) Moved vmf_insert_mixed_prot() export to patch 2 (Michal Hocko)
*) Documented under which conditions it's safe to use a page protection
different from struct vm_area_struct::vm_page_prot. (Michal Hocko)
Changes since v3:
*) More documentation regarding under which conditions it's safe to use a
page protection different from struct vm_area_struct::vm_page_prot. This
time also in core vm. (Michal Hocko)

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 

Seems all concerns with this series have been addressed. Could I have an 
ack to merge this through a DRM tree?


Thanks,

Thomas



___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/3] drm/ttm: always keep BOs on the LRU

2019-10-18 Thread VMware

Hi, Christian,


On 10/16/19 11:30 AM, Christian König wrote:

Am 25.09.19 um 14:10 schrieb Christian König:

Am 25.09.19 um 14:06 schrieb Thomas Hellström (VMware):

On 9/25/19 12:55 PM, Christian König wrote:

This allows blocking for BOs to become available
in the memory management.

Amdgpu is doing this for quite a while now during CS. Now
apply the new behavior to all drivers using TTM.

Signed-off-by: Christian König 


Got to test this to see that there are no regressions.


Did you got time to test this or did I just missed your response?

Thanks in advance,
Christian.


Sorry for the delay. A  lot on my plate currently. I'll get around to 
this later today or on monday.


Thanks,

Thomas


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [PATCH 2/3] drm/ttm: always keep BOs on the LRU

2019-10-21 Thread VMware

On 10/18/19 3:49 PM, Thomas Hellström (VMware) wrote:

Hi, Christian,


On 10/16/19 11:30 AM, Christian König wrote:

Am 25.09.19 um 14:10 schrieb Christian König:

Am 25.09.19 um 14:06 schrieb Thomas Hellström (VMware):

On 9/25/19 12:55 PM, Christian König wrote:

This allows blocking for BOs to become available
in the memory management.

Amdgpu is doing this for quite a while now during CS. Now
apply the new behavior to all drivers using TTM.

Signed-off-by: Christian König 


Got to test this to see that there are no regressions.


Did you got time to test this or did I just missed your response?

Thanks in advance,
Christian.


Sorry for the delay. A  lot on my plate currently. I'll get around to 
this later today or on monday.


Thanks,

Thomas



Hi, Christian!

This patch is

Acked-by: Thomas Hellstrom 

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2

2020-02-18 Thread VMware

On 2/17/20 6:55 PM, Daniel Vetter wrote:

On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote:

Implement the importer side of unpinned DMA-buf handling.

v2: update page tables immediately

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 ++
  2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 770baba621b3..48de7624d49c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
return ERR_PTR(ret);
  }
  
+/**

+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+   struct drm_gem_object *obj = attach->importer_priv;
+   struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+   struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct ttm_operation_ctx ctx = { false, false };
+   struct ttm_placement placement = {};
+   struct amdgpu_vm_bo_base *bo_base;
+   int r;
+
+   if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+   return;
+
+   r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+   if (r) {
+   DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+   return;
+   }
+
+   for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+   struct amdgpu_vm *vm = bo_base->vm;
+   struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+   if (ticket) {

Yeah so this is kinda why I've been a total pain about the exact semantics
of the move_notify hook. I think we should flat-out require that importers
_always_ have a ticket attach when they call this, and that they can cope
with additional locks being taken (i.e. full EDEADLCK) handling.

Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to
the dma_resv object, which we then can take #ifdef
CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket).

Now the real disaster is how we handle deadlocks. Two issues:

- Ideally we'd keep any lock we've taken locked until the end, it helps
   needless backoffs. I've played around a bit with that but not even poc
   level, just an idea:

https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582

   Idea is essentially to track a list of objects we had to lock as part of
   the ttm_bo_validate of the main object.

- Second one is if we get a EDEADLCK on one of these sublocks (like the
   one here). We need to pass that up the entire callchain, including a
   temporary reference (we have to drop locks to do the ww_mutex_lock_slow
   call), and need a custom callback to drop that temporary reference
   (since that's all driver specific, might even be internal ww_mutex and
   not anything remotely looking like a normal dma_buf). This probably
   needs the exec util helpers from ttm, but at the dma_resv level, so that
   we can do something like this:

struct dma_resv_ticket {
struct ww_acquire_ctx base;

/* can be set by anyone (including other drivers) that got hold of
 * this ticket and had to acquire some new lock. This lock might
 * protect anything, including driver-internal stuff, and isn't
 * required to be a dma_buf or even just a dma_resv. */
struct ww_mutex *contended_lock;

/* callback which the driver (which might be a dma-buf exporter
 * and not matching the driver that started this locking ticket)
 * sets together with @contended_lock, for the main driver to drop
 * when it calls dma_resv_unlock on the contended_lock. */
void (drop_ref*)(struct ww_mutex *contended_lock);
};

This is all supremely nasty (also ttm_bo_validate would need to be
improved to handle these sublocks and random new objects that could force
a ww_mutex_lock_slow).


Just a short comment on this:

Neither the currently used wait-die or the wound-wait algorithm 
*strictly* requires a slow lock on the contended lock. For wait-die it's 
just very convenient since it makes us sleep instead of spinning with 
-EDEADLK on the contended lock. For wound-wait IIRC one could just 
immediately restart the whole locking transaction after an -EDEADLK, and 
the transaction would automatically end up waiting on the contended 
lock, provided the mutex lock stealing is not allowed. There is however 
a possibility that the transaction will be wounded again on another 
lock, taken

Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2

2020-02-18 Thread VMware

On 2/18/20 10:01 PM, Daniel Vetter wrote:

On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware)
 wrote:

On 2/17/20 6:55 PM, Daniel Vetter wrote:

On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote:

Implement the importer side of unpinned DMA-buf handling.

v2: update page tables immediately

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 -
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 ++
   2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 770baba621b3..48de7624d49c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct 
dma_buf *dma_buf)
  return ERR_PTR(ret);
   }

+/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+struct drm_gem_object *obj = attach->importer_priv;
+struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+struct ttm_operation_ctx ctx = { false, false };
+struct ttm_placement placement = {};
+struct amdgpu_vm_bo_base *bo_base;
+int r;
+
+if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+return;
+
+r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+if (r) {
+DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+return;
+}
+
+for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+struct amdgpu_vm *vm = bo_base->vm;
+struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+if (ticket) {

Yeah so this is kinda why I've been a total pain about the exact semantics
of the move_notify hook. I think we should flat-out require that importers
_always_ have a ticket attach when they call this, and that they can cope
with additional locks being taken (i.e. full EDEADLCK) handling.

Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to
the dma_resv object, which we then can take #ifdef
CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket).

Now the real disaster is how we handle deadlocks. Two issues:

- Ideally we'd keep any lock we've taken locked until the end, it helps
needless backoffs. I've played around a bit with that but not even poc
level, just an idea:

https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582

Idea is essentially to track a list of objects we had to lock as part of
the ttm_bo_validate of the main object.

- Second one is if we get a EDEADLCK on one of these sublocks (like the
one here). We need to pass that up the entire callchain, including a
temporary reference (we have to drop locks to do the ww_mutex_lock_slow
call), and need a custom callback to drop that temporary reference
(since that's all driver specific, might even be internal ww_mutex and
not anything remotely looking like a normal dma_buf). This probably
needs the exec util helpers from ttm, but at the dma_resv level, so that
we can do something like this:

struct dma_resv_ticket {
   struct ww_acquire_ctx base;

   /* can be set by anyone (including other drivers) that got hold of
* this ticket and had to acquire some new lock. This lock might
* protect anything, including driver-internal stuff, and isn't
* required to be a dma_buf or even just a dma_resv. */
   struct ww_mutex *contended_lock;

   /* callback which the driver (which might be a dma-buf exporter
* and not matching the driver that started this locking ticket)
* sets together with @contended_lock, for the main driver to drop
* when it calls dma_resv_unlock on the contended_lock. */
   void (drop_ref*)(struct ww_mutex *contended_lock);
};

This is all supremely nasty (also ttm_bo_validate would need to be
improved to handle these sublocks and random new objects that could force
a ww_mutex_lock_slow).


Just a short comment on this:

Neither the currently used wait-die or the wound-wait algorithm
*strictly* requires a slow lock on the contended lock. For wait-die it's
just very convenient since it makes us sleep instead of spinning with
-EDEADLK on the contended lock. For wound-wait IIRC one could just
immediately restart the whole locking transaction after an -EDEADLK, and
the transaction would automatically end up waiting on the contended
lock, provided 

Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2

2020-02-20 Thread VMware

On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote:

On 2/18/20 10:01 PM, Daniel Vetter wrote:

On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware)
 wrote:

On 2/17/20 6:55 PM, Daniel Vetter wrote:

On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote:

Implement the importer side of unpinned DMA-buf handling.

v2: update page tables immediately

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 
-

   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 ++
   2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index 770baba621b3..48de7624d49c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device 
*dev, struct dma_buf *dma_buf)

  return ERR_PTR(ret);
   }

+/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we 
re-create the

+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+    struct drm_gem_object *obj = attach->importer_priv;
+    struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+    struct ttm_operation_ctx ctx = { false, false };
+    struct ttm_placement placement = {};
+    struct amdgpu_vm_bo_base *bo_base;
+    int r;
+
+    if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+    return;
+
+    r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+    if (r) {
+    DRM_ERROR("Failed to invalidate DMA-buf import 
(%d))\n", r);

+    return;
+    }
+
+    for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+    struct amdgpu_vm *vm = bo_base->vm;
+    struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+    if (ticket) {
Yeah so this is kinda why I've been a total pain about the exact 
semantics
of the move_notify hook. I think we should flat-out require that 
importers
_always_ have a ticket attach when they call this, and that they 
can cope

with additional locks being taken (i.e. full EDEADLCK) handling.

Simplest way to force that contract is to add a dummy 2nd ww_mutex 
lock to

the dma_resv object, which we then can take #ifdef
CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket).

Now the real disaster is how we handle deadlocks. Two issues:

- Ideally we'd keep any lock we've taken locked until the end, it 
helps
    needless backoffs. I've played around a bit with that but not 
even poc

    level, just an idea:

https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582 



    Idea is essentially to track a list of objects we had to lock 
as part of

    the ttm_bo_validate of the main object.

- Second one is if we get a EDEADLCK on one of these sublocks (like 
the
    one here). We need to pass that up the entire callchain, 
including a
    temporary reference (we have to drop locks to do the 
ww_mutex_lock_slow

    call), and need a custom callback to drop that temporary reference
    (since that's all driver specific, might even be internal 
ww_mutex and
    not anything remotely looking like a normal dma_buf). This 
probably
    needs the exec util helpers from ttm, but at the dma_resv 
level, so that

    we can do something like this:

struct dma_resv_ticket {
   struct ww_acquire_ctx base;

   /* can be set by anyone (including other drivers) that got 
hold of
    * this ticket and had to acquire some new lock. This lock 
might

    * protect anything, including driver-internal stuff, and isn't
    * required to be a dma_buf or even just a dma_resv. */
   struct ww_mutex *contended_lock;

   /* callback which the driver (which might be a dma-buf exporter
    * and not matching the driver that started this locking 
ticket)
    * sets together with @contended_lock, for the main driver 
to drop

    * when it calls dma_resv_unlock on the contended_lock. */
   void (drop_ref*)(struct ww_mutex *contended_lock);
};

This is all supremely nasty (also ttm_bo_validate would need to be
improved to handle these sublocks and random new objects that could 
force

a ww_mutex_lock_slow).


Just a short comment on this:

Neither the currently used wait-die or the wound-wait algorithm
*strictly* requires a slow lock on the contended lock. For wait-die 
it's

just very convenient since it makes us sleep instead of spinning with
-EDEADLK on the contended lock. For wound-wait IIRC one could just
immediately restart the whole locking transaction after 

[PATCH v4 2/9] mm: Introduce vma_is_special_huge

2020-02-20 Thread VMware
From: Thomas Hellstrom 

For VM_PFNMAP and VM_MIXEDMAP vmas that want to support transhuge pages
and -page table entries, introduce vma_is_special_huge() that takes the
same codepaths as vma_is_dax().

The use of "special" follows the definition in memory.c, vm_normal_page():
"Special" mappings do not wish to be associated with a "struct page"
(either it doesn't exist, or it exists but they don't want to touch it)

For PAGE_SIZE pages, "special" is determined per page table entry to be
able to deal with COW pages. But since we don't have huge COW pages,
we can classify a vma as either "special huge" or "normal huge".

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Acked-by: Christian König 
---
 include/linux/mm.h | 17 +
 mm/huge_memory.c   |  6 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0157d293935f..d370ce2932a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2822,6 +2822,23 @@ extern long copy_huge_page_from_user(struct page 
*dst_page,
const void __user *usr_src,
unsigned int pages_per_huge_page,
bool allow_pagefault);
+
+/**
+ * vma_is_special_huge - Are transhuge page-table entries considered special?
+ * @vma: Pointer to the struct vm_area_struct to consider
+ *
+ * Whether transhuge page-table entries are considered "special" following
+ * the definition in vm_normal_page().
+ *
+ * Return: true if transhuge page-table entries should be considered special,
+ * false otherwise.
+ */
+static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
+{
+   return vma_is_dax(vma) || (vma->vm_file &&
+  (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
+}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 41a0fbddc96b..f8d24fc3f4df 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1789,7 +1789,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
tlb->fullmm);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
-   if (vma_is_dax(vma)) {
+   if (vma_is_special_huge(vma)) {
if (arch_needs_pgtable_deposit())
zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
@@ -2053,7 +2053,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
 */
pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm);
tlb_remove_pud_tlb_entry(tlb, pud, addr);
-   if (vma_is_dax(vma)) {
+   if (vma_is_special_huge(vma)) {
spin_unlock(ptl);
/* No zero page support yet */
} else {
@@ -2162,7 +2162,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
 */
if (arch_needs_pgtable_deposit())
zap_deposited_table(mm, pmd);
-   if (vma_is_dax(vma))
+   if (vma_is_special_huge(vma))
return;
page = pmd_page(_pmd);
if (!PageDirty(page) && pmd_dirty(_pmd))
-- 
2.21.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 6/9] drm/vmwgfx: Support huge page faults

2020-02-20 Thread VMware
From: Thomas Hellstrom 

With vmwgfx dirty-tracking we need a specialized huge_fault
callback. Implement and hook it up.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Roland Scheidegger 
Acked-by: Christian König 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h|  4 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 74 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c   |  5 +-
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a31e726d6d71..82d86f2d2569 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1428,6 +1428,10 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
pgoff_t start, pgoff_t end);
 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
+   enum page_entry_size pe_size);
+#endif
 
 /**
  * VMW_DEBUG_KMS - Debug output for kernel mode-setting
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index 17a5dca7b921..cde3e07ebaf7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -473,7 +473,7 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
 * a lot of unnecessary write faults.
 */
if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
-   prot = vma->vm_page_prot;
+   prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
else
prot = vm_get_page_prot(vma->vm_flags);
 
@@ -486,3 +486,75 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
 
return ret;
 }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
+   enum page_entry_size pe_size)
+{
+   struct vm_area_struct *vma = vmf->vma;
+   struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+   vma->vm_private_data;
+   struct vmw_buffer_object *vbo =
+   container_of(bo, struct vmw_buffer_object, base);
+   pgprot_t prot;
+   vm_fault_t ret;
+   pgoff_t fault_page_size;
+   bool write = vmf->flags & FAULT_FLAG_WRITE;
+   bool is_cow_mapping =
+   (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+
+   switch (pe_size) {
+   case PE_SIZE_PMD:
+   fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
+   break;
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+   case PE_SIZE_PUD:
+   fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
+   break;
+#endif
+   default:
+   WARN_ON_ONCE(1);
+   return VM_FAULT_FALLBACK;
+   }
+
+   /* Always do write dirty-tracking and COW on PTE level. */
+   if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
+   return VM_FAULT_FALLBACK;
+
+   ret = ttm_bo_vm_reserve(bo, vmf);
+   if (ret)
+   return ret;
+
+   if (vbo->dirty) {
+   pgoff_t allowed_prefault;
+   unsigned long page_offset;
+
+   page_offset = vmf->pgoff -
+   drm_vma_node_start(&bo->base.vma_node);
+   if (page_offset >= bo->num_pages ||
+   vmw_resources_clean(vbo, page_offset,
+   page_offset + PAGE_SIZE,
+   &allowed_prefault)) {
+   ret = VM_FAULT_SIGBUS;
+   goto out_unlock;
+   }
+
+   /*
+* Write protect, so we get a new fault on write, and can
+* split.
+*/
+   prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
+   } else {
+   prot = vm_get_page_prot(vma->vm_flags);
+   }
+
+   ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
+   if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+   return ret;
+
+out_unlock:
+   dma_resv_unlock(bo->base.resv);
+
+   return ret;
+}
+#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index ce288756531b..34100d1f5a9d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -34,7 +34,10 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
.page_mkwrite = vmw_bo_vm_mkwrite,
.fault = vmw_bo_vm_fault,
.open = ttm_bo_vm_open,
-   .close = ttm_bo_vm_close
+   .close = ttm_bo_vm_close,
+#ifdef

[PATCH v4 1/9] fs: Constify vma argument to vma_is_dax

2020-02-20 Thread VMware
From: Thomas Hellstrom 

The vma argument is only dereferenced for reading.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Roland Scheidegger 
Acked-by: Christian König 
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98e0349adb52..4f41fdbf402f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3367,7 +3367,7 @@ static inline bool io_is_direct(struct file *filp)
return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
-static inline bool vma_is_dax(struct vm_area_struct *vma)
+static inline bool vma_is_dax(const struct vm_area_struct *vma)
 {
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
 }
-- 
2.21.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 7/9] drm: Add a drm_get_unmapped_area() helper

2020-02-20 Thread VMware
From: Thomas Hellstrom 

Unaligned virtual addresses makes it unlikely that huge page-table entries
can be used.
So align virtual buffer object address huge page boundaries to the
underlying physical address huge page boundaries taking buffer object
sizes into account to determine when it might be possible to use huge
page-table entries.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Roland Scheidegger 
Acked-by: Christian König 
---
 drivers/gpu/drm/drm_file.c | 136 +
 include/drm/drm_file.h |   5 ++
 2 files changed, 141 insertions(+)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 92d16724f949..40fae356d202 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -39,10 +39,13 @@
 #include 
 #include 
 
+#include 
+
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -796,3 +799,136 @@ struct file *mock_drm_getfile(struct drm_minor *minor, 
unsigned int flags)
return file;
 }
 EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * drm_addr_inflate() attempts to construct an aligned area by inflating
+ * the area size and skipping the unaligned start of the area.
+ * adapted from shmem_get_unmapped_area()
+ */
+static unsigned long drm_addr_inflate(unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags,
+ unsigned long huge_size)
+{
+   unsigned long offset, inflated_len;
+   unsigned long inflated_addr;
+   unsigned long inflated_offset;
+
+   offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
+   if (offset && offset + len < 2 * huge_size)
+   return addr;
+   if ((addr & (huge_size - 1)) == offset)
+   return addr;
+
+   inflated_len = len + huge_size - PAGE_SIZE;
+   if (inflated_len > TASK_SIZE)
+   return addr;
+   if (inflated_len < len)
+   return addr;
+
+   inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
+  0, flags);
+   if (IS_ERR_VALUE(inflated_addr))
+   return addr;
+   if (inflated_addr & ~PAGE_MASK)
+   return addr;
+
+   inflated_offset = inflated_addr & (huge_size - 1);
+   inflated_addr += offset - inflated_offset;
+   if (inflated_offset > offset)
+   inflated_addr += huge_size;
+
+   if (inflated_addr > TASK_SIZE - len)
+   return addr;
+
+   return inflated_addr;
+}
+
+/**
+ * drm_get_unmapped_area() - Get an unused user-space virtual memory area
+ * suitable for huge page table entries.
+ * @file: The struct file representing the address space being mmap()'d.
+ * @uaddr: Start address suggested by user-space.
+ * @len: Length of the area.
+ * @pgoff: The page offset into the address space.
+ * @flags: mmap flags
+ * @mgr: The address space manager used by the drm driver. This argument can
+ * probably be removed at some point when all drivers use the same
+ * address space manager.
+ *
+ * This function attempts to find an unused user-space virtual memory area
+ * that can accommodate the size we want to map, and that is properly
+ * aligned to facilitate huge page table entries matching actual
+ * huge pages or huge page aligned memory in buffer objects. Buffer objects
+ * are assumed to start at huge page boundary pfns (io memory) or be
+ * populated by huge pages aligned to the start of the buffer object
+ * (system- or coherent memory). Adapted from shmem_get_unmapped_area.
+ *
+ * Return: aligned user-space address.
+ */
+unsigned long drm_get_unmapped_area(struct file *file,
+   unsigned long uaddr, unsigned long len,
+   unsigned long pgoff, unsigned long flags,
+   struct drm_vma_offset_manager *mgr)
+{
+   unsigned long addr;
+   unsigned long inflated_addr;
+   struct drm_vma_offset_node *node;
+
+   if (len > TASK_SIZE)
+   return -ENOMEM;
+
+   /*
+* @pgoff is the file page-offset the huge page boundaries of
+* which typically aligns to physical address huge page boundaries.
+* That's not true for DRM, however, where physical address huge
+* page boundaries instead are aligned with the offset from
+* buffer object start. So adjust @pgoff to be the offset from
+* buffer object start.
+*/
+   drm_vma_offset_lock_lookup(mgr);
+   node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
+   if (node)
+  

[PATCH v4 9/9] drm/vmwgfx: Hook up the helpers to align buffer objects

2020-02-20 Thread VMware
From: Thomas Hellstrom 

Start using the helpers that align buffer object user-space addresses and
buffer object vram addresses to huge page boundaries.
This is to improve the chances of allowing huge page-table entries.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Roland Scheidegger 
Acked-by: Christian König 
---
 drivers/gpu/drm/drm_file.c |  1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 13 +
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h|  1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  2 +-
 4 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 40fae356d202..1df2fca608c3 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -932,3 +932,4 @@ unsigned long drm_get_unmapped_area(struct file *file,
return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index e962048f65d2..5452cabb4a2e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1215,6 +1215,18 @@ static void vmw_remove(struct pci_dev *pdev)
drm_put_dev(dev);
 }
 
+static unsigned long
+vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+   struct drm_file *file_priv = file->private_data;
+   struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
+
+   return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
+&dev_priv->vma_manager);
+}
+
 static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
  void *ptr)
 {
@@ -1386,6 +1398,7 @@ static const struct file_operations vmwgfx_driver_fops = {
.compat_ioctl = vmw_compat_ioctl,
 #endif
.llseek = noop_llseek,
+   .get_unmapped_area = vmw_get_unmapped_area,
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 06267184aa0a..9ea145cffa3d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -929,6 +929,7 @@ extern int vmw_mmap(struct file *filp, struct 
vm_area_struct *vma);
 
 extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv,
size_t gran);
+
 /**
  * TTM buffer object driver - vmwgfx_ttm_buffer.c
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index d8ea3dd10af0..34c721ab3ff3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -754,7 +754,7 @@ static int vmw_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
break;
case TTM_PL_VRAM:
/* "On-card" video ram */
-   man->func = &ttm_bo_manager_func;
+   man->func = &vmw_thp_func;
man->gpu_offset = 0;
man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
man->available_caching = TTM_PL_FLAG_CACHED;
-- 
2.21.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 8/9] drm/vmwgfx: Introduce a huge page aligning TTM range manager

2020-02-20 Thread VMware
From: Thomas Hellstrom 

Using huge page-table entries requires that the physical address of the
start of a buffer object is huge page size aligned.
Make a special version of the TTM range manager that accomplishes this,
but falls back to a smaller page size alignment (PUD->PMD, PMD->NORMAL)
to avoid eviction.
If other drivers want to use it in the future, it can be made a
TTM generic helper. Note that drivers can force eviction for a certain
alignment by assigning the TTM GPU alignment correspondingly.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Roland Scheidegger 
Acked-by: Christian König 
---
 drivers/gpu/drm/vmwgfx/Makefile |   1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |   7 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_thp.c | 166 
 3 files changed, 174 insertions(+)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_thp.c

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index c877a21a0739..421dd2a497a5 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o 
vmwgfx_drv.o \
vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o
 
+vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 82d86f2d2569..06267184aa0a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1433,6 +1433,13 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size);
 #endif
 
+/* Transparent hugepage support - vmwgfx_thp.c */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern const struct ttm_mem_type_manager_func vmw_thp_func;
+#else
+#define vmw_thp_func ttm_bo_manager_func
+#endif
+
 /**
  * VMW_DEBUG_KMS - Debug output for kernel mode-setting
  *
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
new file mode 100644
index ..b7c816ba7166
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Huge page-table-entry support for IO memory.
+ *
+ * Copyright (C) 2007-2019 Vmware, Inc. All rights reservedd.
+ */
+#include "vmwgfx_drv.h"
+#include 
+#include 
+#include 
+
+/**
+ * struct vmw_thp_manager - Range manager implementing huge page alignment
+ *
+ * @mm: The underlying range manager. Protected by @lock.
+ * @lock: Manager lock.
+ */
+struct vmw_thp_manager {
+   struct drm_mm mm;
+   spinlock_t lock;
+};
+
+static int vmw_thp_insert_aligned(struct drm_mm *mm, struct drm_mm_node *node,
+ unsigned long align_pages,
+ const struct ttm_place *place,
+ struct ttm_mem_reg *mem,
+ unsigned long lpfn,
+ enum drm_mm_insert_mode mode)
+{
+   if (align_pages >= mem->page_alignment &&
+   (!mem->page_alignment || align_pages % mem->page_alignment == 0)) {
+   return drm_mm_insert_node_in_range(mm, node,
+  mem->num_pages,
+  align_pages, 0,
+  place->fpfn, lpfn, mode);
+   }
+
+   return -ENOSPC;
+}
+
+static int vmw_thp_get_node(struct ttm_mem_type_manager *man,
+   struct ttm_buffer_object *bo,
+   const struct ttm_place *place,
+   struct ttm_mem_reg *mem)
+{
+   struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv;
+   struct drm_mm *mm = &rman->mm;
+   struct drm_mm_node *node;
+   unsigned long align_pages;
+   unsigned long lpfn;
+   enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
+   int ret;
+
+   node = kzalloc(sizeof(*node), GFP_KERNEL);
+   if (!node)
+   return -ENOMEM;
+
+   lpfn = place->lpfn;
+   if (!lpfn)
+   lpfn = man->size;
+
+   mode = DRM_MM_INSERT_BEST;
+   if (place->flags & TTM_PL_FLAG_TOPDOWN)
+   mode = DRM_MM_INSERT_HIGH;
+
+   spin_lock(&rman->lock);
+   if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
+   align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
+   if (mem->num_pages >= align_pages) {
+   ret = vmw_thp_insert_aligned(mm, node, align_pages,
+

[PATCH v4 3/9] mm: Split huge pages on write-notify or COW

2020-02-20 Thread VMware
From: Thomas Hellstrom 

We currently only do COW and write-notify on the PTE level, so if the
huge_fault() handler returns VM_FAULT_FALLBACK on wp faults,
split the huge pages and page-table entries. Also do this for huge PUDs
if there is no huge_fault() handler and the vma is not anonymous, similar
to how it's done for PMDs.

Note that fs/dax.c does the splitting in the huge_fault() handler, but as
huge_fault() is implemented by modules we need to consider whether to
export the splitting functions for use in the modules or whether to try
to keep calls in the core. Opt for the latter. A follow-up patch can
remove the dax.c split_huge_pmd() if needed.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Acked-by: Christian König 
---
 mm/memory.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 17aadc751e5c..4c49fe963e5c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3880,11 +3880,14 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault 
*vmf, pmd_t orig_pmd)
 {
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(vmf, orig_pmd);
-   if (vmf->vma->vm_ops->huge_fault)
-   return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+   if (vmf->vma->vm_ops->huge_fault) {
+   vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
 
-   /* COW handled on pte level: split pmd */
-   VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
+   if (!(ret & VM_FAULT_FALLBACK))
+   return ret;
+   }
+
+   /* COW or write-notify handled on pte level: split pmd. */
__split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
 
return VM_FAULT_FALLBACK;
@@ -3897,12 +3900,20 @@ static inline bool vma_is_accessible(struct 
vm_area_struct *vma)
 
 static vm_fault_t create_huge_pud(struct vm_fault *vmf)
 {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&\
+   defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vmf->vma))
-   return VM_FAULT_FALLBACK;
-   if (vmf->vma->vm_ops->huge_fault)
-   return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+   goto split;
+   if (vmf->vma->vm_ops->huge_fault) {
+   vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+
+   if (!(ret & VM_FAULT_FALLBACK))
+   return ret;
+   }
+split:
+   /* COW or write-notify not handled on PUD level: split pud.*/
+   __split_huge_pud(vmf->vma, vmf->pud, vmf->address);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
return VM_FAULT_FALLBACK;
 }
-- 
2.21.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 4/9] mm: Add vmf_insert_pfn_xxx_prot() for huge page-table entries

2020-02-20 Thread VMware
From: Thomas Hellstrom 

For graphics drivers needing to modify the page-protection, add
huge page-table entries counterparts to vmf_insert_pfn_prot().

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Acked-by: Christian König 
---
 include/linux/huge_mm.h | 41 +++--
 mm/huge_memory.c| 38 --
 2 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 0b84e13e88e2..a95d1bc8ffe8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -47,8 +47,45 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, 
unsigned long old_addr,
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
-vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
-vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
+  pgprot_t pgprot, bool write);
+
+/**
+ * vmf_insert_pfn_pmd - insert a pmd size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pmd size pfn. See vmf_insert_pfn() for additional info.
+ *
+ * Return: vm_fault_t value.
+ */
+static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn,
+   bool write)
+{
+   return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
+}
+vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
+  pgprot_t pgprot, bool write);
+
+/**
+ * vmf_insert_pfn_pud - insert a pud size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pud size pfn. See vmf_insert_pfn() for additional info.
+ *
+ * Return: vm_fault_t value.
+ */
+static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn,
+   bool write)
+{
+   return vmf_insert_pfn_pud_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
+}
+
 enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f8d24fc3f4df..b2ec62cca3ae 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -811,11 +811,24 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, 
unsigned long addr,
pte_free(mm, pgtable);
 }
 
-vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
+/**
+ * vmf_insert_pfn_pmd_prot - insert a pmd size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pmd size pfn. See vmf_insert_pfn() for additional info and
+ * also consult the vmf_insert_mixed_prot() documentation when
+ * @pgprot != @vmf->vma->vm_page_prot.
+ *
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
+  pgprot_t pgprot, bool write)
 {
unsigned long addr = vmf->address & PMD_MASK;
struct vm_area_struct *vma = vmf->vma;
-   pgprot_t pgprot = vma->vm_page_prot;
pgtable_t pgtable = NULL;
 
/*
@@ -843,7 +856,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t 
pfn, bool write)
insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
return VM_FAULT_NOPAGE;
 }
-EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
+EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot);
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
@@ -889,11 +902,24 @@ static void insert_pfn_pud(struct vm_area_struct *vma, 
unsigned long addr,
spin_unlock(ptl);
 }
 
-vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
+/**
+ * vmf_insert_pfn_pud_prot - insert a pud size pfn
+ * @vmf: Structure describing the fault
+ * @pfn: pfn to insert
+ * @pgprot: page protection to use
+ * @write: whether it's a write fault
+ *
+ * Insert a pud size pfn. See vmf_insert_pfn() for additional info and
+ * also consult the vmf_insert_mixed_prot() documentation when
+ * @pgprot != @vmf->vma->vm_page_prot.
+ *
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
+  pgprot_t pgprot, bool write)
 {
unsigned long addr = vmf->address & PUD_MASK;
struct vm_area_struct *vma = vmf->vma;
-   pgprot_t pgpro

[PATCH v4 0/9] Huge page-table entries for TTM

2020-02-20 Thread VMware
In order to reduce TLB misses and CPU usage this patchset enables huge-
and giant page-table entries for TTM and TTM-enabled graphics drivers.

Patch 1 and 2 introduce a vma_is_special_huge() function to make the mm code
take the same path as DAX when splitting huge- and giant page table entries,
(which currently means zapping the page-table entry and rely on re-faulting).

Patch 3 makes the mm code split existing huge page-table entries
on huge_fault fallbacks. Typically on COW or on buffer-objects that want
write-notify. COW and write-notification is always done on the lowest
page-table level. See the patch log message for additional considerations.

Patch 4 introduces functions to allow the graphics drivers to manipulate
the caching- and encryption flags of huge page-table entries without ugly
hacks.

Patch 5 implements the huge_fault handler in TTM.
This enables huge page-table entries, provided that the kernel is configured
to support transhuge pages, either by default or using madvise().
However, they are unlikely to be inserted unless the kernel buffer object
pfns and user-space addresses align perfectly. There are various options
here, but since buffer objects that reside in system pages typically start
at huge page boundaries if they are backed by huge pages, we try to enforce
buffer object starting pfns and user-space addresses to be huge page-size
aligned if their size exceeds a huge page-size. If pud-size transhuge
("giant") pages are enabled by the arch, the same holds for those.

Patch 6 implements a specialized huge_fault handler for vmwgfx.
The vmwgfx driver may perform dirty-tracking and needs some special code
to handle that correctly.

Patch 7 implements a drm helper to align user-space addresses according
to the above scheme, if possible.

Patch 8 implements a TTM range manager for vmwgfx that does the same for
graphics IO memory. This may later be reused by other graphics drivers
if necessary.

Patch 9 finally hooks up the helpers of patch 7 and 8 to the vmwgfx driver.
A similar change is needed for graphics drivers that want a reasonable
likelyhood of actually using huge page-table entries.

If a buffer object size is not huge-page or giant-page aligned,
its size will NOT be inflated by this patchset. This means that the buffer
object tail will use smaller size page-table entries and thus no memory
overhead occurs. Drivers that want to pay the memory overhead price need to
implement their own scheme to inflate buffer-object sizes.

PMD size huge page-table-entries have been tested with vmwgfx and found to
work well both with system memory backed and IO memory backed buffer objects.

PUD size giant page-table-entries have seen limited (fault and COW) testing
using a modified kernel (to support 1GB page allocations) and a fake vmwgfx
TTM memory type. The vmwgfx driver does otherwise not support 1GB-size IO
memory resources.

Comments and suggestions welcome.
Thomas

Changes since RFC:
* Check for buffer objects present in contigous IO Memory (Christian König)
* Rebased on the vmwgfx emulated coherent memory functionality. That rebase
  adds patch 5.
Changes since v1:
* Make the new TTM range manager vmwgfx-specific. (Christian König)
* Minor fixes for configs that don't support or only partially support
  transhuge pages.
Changes since v2:
* Minor coding style and doc fixes in patch 5/9 (Christian König)
* Patch 5/9 doesn't touch mm. Remove from the patch title.
Changes since v3:
* Added reviews and acks
* Implemented ugly but generic ttm_pgprot_is_wrprotecting() instead of arch
  specific code.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 5/9] drm/ttm, drm/vmwgfx: Support huge TTM pagefaults

2020-02-20 Thread VMware
From: Thomas Hellstrom 

Support huge (PMD-size and PUD-size) page-table entries by providing a
huge_fault() callback.
We still support private mappings and write-notify by splitting the huge
page-table entries on write-access.

Note that for huge page-faults to occur, either the kernel needs to be
compiled with trans-huge-pages always enabled, or the kernel needs to be
compiled with trans-huge-pages enabled using madvise, and the user-space
app needs to call madvise() to enable trans-huge pages on a per-mapping
basis.

Furthermore huge page-faults will not succeed unless buffer objects and
user-space addresses are aligned on huge page size boundaries.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: "Matthew Wilcox (Oracle)" 
Cc: "Kirill A. Shutemov" 
Cc: Ralph Campbell 
Cc: "Jérôme Glisse" 
Cc: "Christian König" 
Cc: Dan Williams 
Signed-off-by: Thomas Hellstrom 
Reviewed-by: Roland Scheidegger 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c| 161 -
 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c |   2 +-
 include/drm/ttm/ttm_bo_api.h   |   3 +-
 3 files changed, 161 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 389128b8c4dd..0af14835504c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -156,6 +156,89 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_vm_reserve);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/**
+ * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
+ * @vmf: Fault data
+ * @bo: The buffer object
+ * @page_offset: Page offset from bo start
+ * @fault_page_size: The size of the fault in pages.
+ * @pgprot: The page protections.
+ * Does additional checking whether it's possible to insert a PUD or PMD
+ * pfn and performs the insertion.
+ *
+ * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
+ * a huge fault was not possible, or on insertion error.
+ */
+static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+   struct ttm_buffer_object *bo,
+   pgoff_t page_offset,
+   pgoff_t fault_page_size,
+   pgprot_t pgprot)
+{
+   pgoff_t i;
+   vm_fault_t ret;
+   unsigned long pfn;
+   pfn_t pfnt;
+   struct ttm_tt *ttm = bo->ttm;
+   bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+   /* Fault should not cross bo boundary. */
+   page_offset &= ~(fault_page_size - 1);
+   if (page_offset + fault_page_size > bo->num_pages)
+   goto out_fallback;
+
+   if (bo->mem.bus.is_iomem)
+   pfn = ttm_bo_io_mem_pfn(bo, page_offset);
+   else
+   pfn = page_to_pfn(ttm->pages[page_offset]);
+
+   /* pfn must be fault_page_size aligned. */
+   if ((pfn & (fault_page_size - 1)) != 0)
+   goto out_fallback;
+
+   /* Check that memory is contiguous. */
+   if (!bo->mem.bus.is_iomem) {
+   for (i = 1; i < fault_page_size; ++i) {
+   if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
+   goto out_fallback;
+   }
+   } else if (bo->bdev->driver->io_mem_pfn) {
+   for (i = 1; i < fault_page_size; ++i) {
+   if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
+   goto out_fallback;
+   }
+   }
+
+   pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
+   if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
+   ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+   else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
+   ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
+#endif
+   else
+   WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
+
+   if (ret != VM_FAULT_NOPAGE)
+   goto out_fallback;
+
+   return VM_FAULT_NOPAGE;
+out_fallback:
+   count_vm_event(THP_FAULT_FALLBACK);
+   return VM_FAULT_FALLBACK;
+}
+#else
+static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
+   struct ttm_buffer_object *bo,
+   pgoff_t page_offset,
+   pgoff_t fault_page_size,
+   pgprot_t pgprot)
+{
+   return VM_FAULT_FALLBACK;
+}
+#endif
+
 /**
  * ttm_bo_vm_fault_reserved - TTM fault helper
  * @vmf: The struct vm_fault given as argument to the fault callback
@@ -163,6 +246,7 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
  * @num_prefault: Maximum number of prefault pages. The caller may want to
  * specify this based on madvice settings and the size of the GPU object
  * backed by the memory.
+ * @fault_page_size: The size of the fault in p

Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2

2020-02-20 Thread VMware

On 2/20/20 7:04 PM, Daniel Vetter wrote:

On Thu, Feb 20, 2020 at 10:39:06AM +0100, Thomas Hellström (VMware) wrote:

On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote:

On 2/18/20 10:01 PM, Daniel Vetter wrote:

On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware)
 wrote:

On 2/17/20 6:55 PM, Daniel Vetter wrote:

On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote:

Implement the importer side of unpinned DMA-buf handling.

v2: update page tables immediately

Signed-off-by: Christian König 
---
    drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66
-
    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 ++
    2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 770baba621b3..48de7624d49c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct
drm_device *dev, struct dma_buf *dma_buf)
   return ERR_PTR(ret);
    }

+/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that
the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+    struct drm_gem_object *obj = attach->importer_priv;
+    struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+    struct ttm_operation_ctx ctx = { false, false };
+    struct ttm_placement placement = {};
+    struct amdgpu_vm_bo_base *bo_base;
+    int r;
+
+    if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+    return;
+
+    r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+    if (r) {
+    DRM_ERROR("Failed to invalidate DMA-buf
import (%d))\n", r);
+    return;
+    }
+
+    for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+    struct amdgpu_vm *vm = bo_base->vm;
+    struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+    if (ticket) {

Yeah so this is kinda why I've been a total pain about the
exact semantics
of the move_notify hook. I think we should flat-out require
that importers
_always_ have a ticket attach when they call this, and that
they can cope
with additional locks being taken (i.e. full EDEADLCK) handling.

Simplest way to force that contract is to add a dummy 2nd
ww_mutex lock to
the dma_resv object, which we then can take #ifdef
CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket).

Now the real disaster is how we handle deadlocks. Two issues:

- Ideally we'd keep any lock we've taken locked until the
end, it helps
     needless backoffs. I've played around a bit with that
but not even poc
     level, just an idea:

https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582


     Idea is essentially to track a list of objects we had to
lock as part of
     the ttm_bo_validate of the main object.

- Second one is if we get a EDEADLCK on one of these
sublocks (like the
     one here). We need to pass that up the entire callchain,
including a
     temporary reference (we have to drop locks to do the
ww_mutex_lock_slow
     call), and need a custom callback to drop that temporary reference
     (since that's all driver specific, might even be
internal ww_mutex and
     not anything remotely looking like a normal dma_buf).
This probably
     needs the exec util helpers from ttm, but at the
dma_resv level, so that
     we can do something like this:

struct dma_resv_ticket {
    struct ww_acquire_ctx base;

    /* can be set by anyone (including other drivers)
that got hold of
     * this ticket and had to acquire some new lock. This
lock might
     * protect anything, including driver-internal stuff, and isn't
     * required to be a dma_buf or even just a dma_resv. */
    struct ww_mutex *contended_lock;

    /* callback which the driver (which might be a dma-buf exporter
     * and not matching the driver that started this
locking ticket)
     * sets together with @contended_lock, for the main
driver to drop
     * when it calls dma_resv_unlock on the contended_lock. */
    void (drop_ref*)(struct ww_mutex *contended_lock);
};

This is all supremely nasty (also ttm_bo_validate would need to be
improved to handle these sublocks and random new objects
that could force
a ww_mutex_lock_slow).


Just a short comment on this:

Neither the currently used wait-die or the wound-wait algorithm
*strictly* requires a slow lock on the contended lock. For
wait-die it's
just very convenient since it makes us sleep instead of spinning with
-EDEADLK on the contended

Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2

2020-02-20 Thread VMware

On 2/20/20 9:08 PM, Daniel Vetter wrote:

On Thu, Feb 20, 2020 at 08:46:27PM +0100, Thomas Hellström (VMware) wrote:

On 2/20/20 7:04 PM, Daniel Vetter wrote:

On Thu, Feb 20, 2020 at 10:39:06AM +0100, Thomas Hellström (VMware) wrote:

On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote:

On 2/18/20 10:01 PM, Daniel Vetter wrote:

On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware)
 wrote:

On 2/17/20 6:55 PM, Daniel Vetter wrote:

On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote:

Implement the importer side of unpinned DMA-buf handling.

v2: update page tables immediately

Signed-off-by: Christian König 
---
     drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66
-
     drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 ++
     2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 770baba621b3..48de7624d49c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct
drm_device *dev, struct dma_buf *dma_buf)
    return ERR_PTR(ret);
     }

+/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that
the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+    struct drm_gem_object *obj = attach->importer_priv;
+    struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+    struct ttm_operation_ctx ctx = { false, false };
+    struct ttm_placement placement = {};
+    struct amdgpu_vm_bo_base *bo_base;
+    int r;
+
+    if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+    return;
+
+    r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+    if (r) {
+    DRM_ERROR("Failed to invalidate DMA-buf
import (%d))\n", r);
+    return;
+    }
+
+    for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+    struct amdgpu_vm *vm = bo_base->vm;
+    struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+    if (ticket) {

Yeah so this is kinda why I've been a total pain about the
exact semantics
of the move_notify hook. I think we should flat-out require
that importers
_always_ have a ticket attach when they call this, and that
they can cope
with additional locks being taken (i.e. full EDEADLCK) handling.

Simplest way to force that contract is to add a dummy 2nd
ww_mutex lock to
the dma_resv object, which we then can take #ifdef
CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket).

Now the real disaster is how we handle deadlocks. Two issues:

- Ideally we'd keep any lock we've taken locked until the
end, it helps
      needless backoffs. I've played around a bit with that
but not even poc
      level, just an idea:

https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582


      Idea is essentially to track a list of objects we had to
lock as part of
      the ttm_bo_validate of the main object.

- Second one is if we get a EDEADLCK on one of these
sublocks (like the
      one here). We need to pass that up the entire callchain,
including a
      temporary reference (we have to drop locks to do the
ww_mutex_lock_slow
      call), and need a custom callback to drop that temporary reference
      (since that's all driver specific, might even be
internal ww_mutex and
      not anything remotely looking like a normal dma_buf).
This probably
      needs the exec util helpers from ttm, but at the
dma_resv level, so that
      we can do something like this:

struct dma_resv_ticket {
     struct ww_acquire_ctx base;

     /* can be set by anyone (including other drivers)
that got hold of
      * this ticket and had to acquire some new lock. This
lock might
      * protect anything, including driver-internal stuff, and isn't
      * required to be a dma_buf or even just a dma_resv. */
     struct ww_mutex *contended_lock;

     /* callback which the driver (which might be a dma-buf exporter
      * and not matching the driver that started this
locking ticket)
      * sets together with @contended_lock, for the main
driver to drop
      * when it calls dma_resv_unlock on the contended_lock. */
     void (drop_ref*)(struct ww_mutex *contended_lock);
};

This is all supremely nasty (also ttm_bo_validate would need to be
improved to handle these sublocks and random new objects
that could force
a ww_mutex_lock_slow).


Just a short comment on this:

Neither the currently used wait-die or the wound-wait algorithm
*strictly* requires a slo

Re: [PATCH 3/8] drm/vmwgfx: don't use ttm bo->offset

2020-02-21 Thread VMware

On 2/19/20 2:53 PM, Nirmoy Das wrote:

Calculate GPU offset within vmwgfx driver itself without depending on
bo->offset

Signed-off-by: Nirmoy Das 
Acked-by: Christian König 


Tested-by: Thomas Hellstrom 
Acked-by: Thomas Hellstrom 


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v3 0/8] do not store GPU address in TTM

2020-02-21 Thread VMware

Hi,

On 2/19/20 2:53 PM, Nirmoy Das wrote:

With this patch series I am trying to remove GPU address dependency in
TTM and moving GPU address calculation to individual drm drivers.


For future reference, could you please add a motivation for the series?
for example cleanup, needed because, simplifies... etc.

Thanks,

Thomas


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2

2020-02-21 Thread VMware

On 2/21/20 6:12 PM, Daniel Vetter wrote:

On Thu, Feb 20, 2020 at 11:51:07PM +0100, Thomas Hellström (VMware) wrote:

On 2/20/20 9:08 PM, Daniel Vetter wrote:

On Thu, Feb 20, 2020 at 08:46:27PM +0100, Thomas Hellström (VMware) wrote:

On 2/20/20 7:04 PM, Daniel Vetter wrote:

On Thu, Feb 20, 2020 at 10:39:06AM +0100, Thomas Hellström (VMware) wrote:

On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote:

On 2/18/20 10:01 PM, Daniel Vetter wrote:

On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware)
 wrote:

On 2/17/20 6:55 PM, Daniel Vetter wrote:

On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote:

Implement the importer side of unpinned DMA-buf handling.

v2: update page tables immediately

Signed-off-by: Christian König 
---
      drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66
-
      drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 ++
      2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 770baba621b3..48de7624d49c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct
drm_device *dev, struct dma_buf *dma_buf)
     return ERR_PTR(ret);
      }

+/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that
the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+    struct drm_gem_object *obj = attach->importer_priv;
+    struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+    struct ttm_operation_ctx ctx = { false, false };
+    struct ttm_placement placement = {};
+    struct amdgpu_vm_bo_base *bo_base;
+    int r;
+
+    if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+    return;
+
+    r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+    if (r) {
+    DRM_ERROR("Failed to invalidate DMA-buf
import (%d))\n", r);
+    return;
+    }
+
+    for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+    struct amdgpu_vm *vm = bo_base->vm;
+    struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;
+
+    if (ticket) {

Yeah so this is kinda why I've been a total pain about the
exact semantics
of the move_notify hook. I think we should flat-out require
that importers
_always_ have a ticket attach when they call this, and that
they can cope
with additional locks being taken (i.e. full EDEADLCK) handling.

Simplest way to force that contract is to add a dummy 2nd
ww_mutex lock to
the dma_resv object, which we then can take #ifdef
CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket).

Now the real disaster is how we handle deadlocks. Two issues:

- Ideally we'd keep any lock we've taken locked until the
end, it helps
       needless backoffs. I've played around a bit with that
but not even poc
       level, just an idea:

https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582


       Idea is essentially to track a list of objects we had to
lock as part of
       the ttm_bo_validate of the main object.

- Second one is if we get a EDEADLCK on one of these
sublocks (like the
       one here). We need to pass that up the entire callchain,
including a
       temporary reference (we have to drop locks to do the
ww_mutex_lock_slow
       call), and need a custom callback to drop that temporary reference
       (since that's all driver specific, might even be
internal ww_mutex and
       not anything remotely looking like a normal dma_buf).
This probably
       needs the exec util helpers from ttm, but at the
dma_resv level, so that
       we can do something like this:

struct dma_resv_ticket {
      struct ww_acquire_ctx base;

      /* can be set by anyone (including other drivers)
that got hold of
       * this ticket and had to acquire some new lock. This
lock might
       * protect anything, including driver-internal stuff, and isn't
       * required to be a dma_buf or even just a dma_resv. */
      struct ww_mutex *contended_lock;

      /* callback which the driver (which might be a dma-buf exporter
       * and not matching the driver that started this
locking ticket)
       * sets together with @contended_lock, for the main
driver to drop
       * when it calls dma_resv_unlock on the contended_lock. */
      void (drop_ref*)(struct ww_mutex *contended_lock);
};

This is all supremely nasty (also ttm_bo_validate would need to be
improved to handle these sublocks and random new objects
that could force
a

  1   2   3   4   5   >