Support the kernel write only interface. Also add an interface to ease user flushing for non LLC coherent architectures.
Signed-off-by: Ben Widawsky <b...@bwidawsk.net> --- include/drm/i915_drm.h | 5 ++ intel/intel_bufmgr.h | 9 ++++ intel/intel_bufmgr_gem.c | 105 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 115 insertions(+), 4 deletions(-) diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index adc2392..576d419 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_FLUSH 0x2a #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image) #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) +#define DRM_IOCTL_I915_GEM_FLUSH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_FLUSH , struct drm_i915_gem_flush) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -835,4 +837,7 @@ struct drm_intel_overlay_attrs { __u32 gamma5; }; +struct drm_i915_gem_flush { + __u32 bo_handle; +}; #endif /* _I915_DRM_H_ */ diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index 889ef46..c6fd907 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -81,6 +81,9 @@ struct _drm_intel_bo { * MM-specific handle for accessing object */ int handle; + + /** Buffer is only writable from CPU */ + int write_only; }; #define BO_ALLOC_FOR_RENDER (1<<0) @@ -145,6 +148,12 @@ drm_intel_bo *drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, unsigned int handle); void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr); void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr); +int drm_intel_gem_bo_flush(drm_intel_bo *bo, uint32_t offset, size_t size); +#define drm_intel_gem_bo_flush_obj(bo) drm_intel_gem_bo_flush(bo, 0, bo->size); +int drm_intel_gem_bo_map_wo(drm_intel_bo *bo); +int drm_intel_gem_bo_map_gtt_wo(drm_intel_bo *bo, + uint32_t prefault_offset, + int prefault_pages); int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo); int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 4f4de92..b9e8936 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -998,7 +998,7 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) } } -static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) +static int do_bo_map(drm_intel_bo *bo, int read_enable, int write_enable) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; @@ -1036,6 +1036,9 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) bo_gem->mem_virtual); bo->virtual = bo_gem->mem_virtual; + if (read_enable == 0 && bo->write_only) + goto map_done; + set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_CPU; if (write_enable) @@ -1050,13 +1053,73 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) __FILE__, __LINE__, bo_gem->gem_handle, strerror(errno)); } + if (read_enable == 0) + bo->write_only = 1; +map_done: pthread_mutex_unlock(&bufmgr_gem->lock); return 0; } -int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) +{ + return do_bo_map(bo, 1, write_enable); +} + +static int +get_cacheline_size(void) +{ + int ret = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (ret == -1) + ret = 64; + + return ret; +} + +int drm_intel_gem_bo_flush(drm_intel_bo *bo, uint32_t offset, size_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + const int cacheline_size = get_cacheline_size(); + int num_cachelines; + struct drm_i915_gem_flush args; + int i; + + if (bufmgr_gem->gen >= 6) + return 0; + + if (bo_gem->mem_virtual == NULL) + return -1; + + /* round offset down to cacheline size */ + offset &= ~(cacheline_size - 1); + + num_cachelines = (bo->size - offset) / size; + if (num_cachelines <= 0) + return 0; + + if ((offset + size) > bo->size) + return -1; + + for(i = 0; i < num_cachelines; i++) { + void *addr = ((uint8_t *)bo_gem->mem_virtual) + (i * cacheline_size); + asm volatile("clflush (%0)" :: "r" (addr)); + } + + args.bo_handle = 0; + return ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_FLUSH, &args); +} + +/** + * Any buffers mapped write only must use the flush interface + */ +int drm_intel_gem_bo_map_wo(drm_intel_bo *bo) +{ + return do_bo_map(bo, 0, 1); +} + +static int do_bo_map_gtt(drm_intel_bo *bo, int read, int write) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; @@ -1112,8 +1175,8 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) /* Now move it to the GTT domain so that the CPU caches are flushed */ set_domain.handle = bo_gem->gem_handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = I915_GEM_DOMAIN_GTT; + set_domain.read_domains = read; + set_domain.write_domain = write; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); @@ -1128,6 +1191,40 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) return 0; } +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +{ + return do_bo_map_gtt(bo, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); +} + +int drm_intel_gem_bo_map_gtt_wo(drm_intel_bo *bo, + uint32_t prefault_offset, + int prefault_pages) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + volatile uint8_t *faultin_start; + int ret, i; + + prefault_offset &= ~(4096 - 1); + + if (prefault_offset + (prefault_pages * 4096) > bo->size) + return -EINVAL; + + ret = do_bo_map_gtt(bo, 0, I915_GEM_DOMAIN_GTT); + if (ret) + return ret; + + if (prefault_pages <= 0) + return 0; + + faultin_start = ((uint8_t *)bo_gem->gtt_virtual) + prefault_offset; + for (i = 0; i < prefault_pages; i+=4096) { + volatile uint8_t *bar = (uint8_t *)(faultin_start + i); + *bar; + } + + return ret; +} + int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; -- 1.7.6.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx