NEWS | 80 ++++ configure.ac | 9 src/Makefile.am | 63 +-- src/intel_device.c | 279 ++++++++++++++ src/intel_display.c | 2 src/intel_driver.c | 2 src/intel_driver.h | 8 src/intel_module.c | 75 +-- src/sna/Makefile.am | 2 src/sna/blt.c | 604 ++++++++++++++++++++++++++---- src/sna/brw/brw_eu.h | 2 src/sna/brw/brw_eu_debug.c | 95 ---- src/sna/compiler.h | 2 src/sna/gen2_render.c | 14 src/sna/gen3_render.c | 29 + src/sna/gen4_render.c | 25 - src/sna/gen5_render.c | 63 +-- src/sna/gen6_render.c | 31 - src/sna/gen7_render.c | 79 ++-- src/sna/kgem.c | 226 +++++++++-- src/sna/kgem.h | 98 +++-- src/sna/sna.h | 9 src/sna/sna_accel.c | 842 ++++++++++++++++++++++++++++++++++--------- src/sna/sna_blt.c | 182 +++++---- src/sna/sna_composite.c | 2 src/sna/sna_damage.c | 54 ++ src/sna/sna_display.c | 747 ++++++++++++++++++++++++++------------ src/sna/sna_dri.c | 170 ++++---- src/sna/sna_driver.c | 130 ------ src/sna/sna_gradient.c | 13 src/sna/sna_io.c | 150 ++++--- src/sna/sna_render.c | 78 ++- src/sna/sna_render.h | 3 src/sna/sna_tiling.c | 71 ++- src/sna/sna_trapezoids.c | 7 src/sna/sna_video.c | 44 ++ src/sna/sna_video.h | 3 src/sna/sna_video_overlay.c | 10 src/sna/sna_video_sprite.c | 5 src/sna/sna_video_textured.c | 20 - test/Makefile.am | 2 41 files changed, 3105 insertions(+), 1225 deletions(-)
New commits: commit 7d9163983ea2e960c0a7b55266fcc532b9c6e382 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Jun 30 15:26:57 2013 +0100 2.21.11 release diff --git a/NEWS b/NEWS index 0455c87..576ef86 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,55 @@ +Release 2.21.11 (2013-06-30) +============================ +An eventful week. What started with a regression with some builds of +firefox on some machines lead ultimately to the discovery of an older +kernel bug. Aside from the work to fix the image bug and a few other +older bugs that were reported and resolved this week, there is also a +(hopefully) subtle change to the initial configuration of displays. In +the absence of user overrides in xorg.conf, the DDX will try to preserve +the same display configuration as used by the kernel, which hopefully +will be the same configuration as setup by the BIOS. The result should +be a boot sequence that does not resize at all (aka fastboot) - until +the display manager takes over and loads a completely different +configuration! + + * Add reference counting of drmMaster for ZaphodHeads + https://bugs.freedesktop.org/show_bug.cgi?id=66041 + + * Add a GPU flush before changing blend modes on Ironlake + https://bugs.freedesktop.org/show_bug.cgi?id=51422 + + * Fix occasional missing images for inplace uploads + [regression from 2.21.10] + https://bugs.freedesktop.org/show_bug.cgi?id=66059 + + * Add missing utility files to the tarball and remove a few unused ones + + * Initialise PolyPoint operand state before calling miWideDash + https://bugs.freedesktop.org/show_bug.cgi?id=66104 + + * Fix redirection handling for rendering into large surfaces + https://bugs.freedesktop.org/show_bug.cgi?id=66168 + https://bugs.freedesktop.org/show_bug.cgi?id=66249 + + * Fix compilation of UXA with xorg-xserver < 1.10 + [regression from 2.20.0] + + * Fix consideration of gradients for deciding when to migrate render + operations + [performance regression from 2.21.10, the bug itself is older] + https://bugs.freedesktop.org/show_bug.cgi?id=66297 + +Also fixed this week was: + +commit 22fd5ca947b58901927d100d2b1aa0f1672b3435 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Fri Jun 28 16:54:08 2013 +0100 + + drm/i915: Only clear write-domains after a successful wait-seqno + +which affects kernels 3.7 - 3.10, coming to a stable kernel near you soon. + + Release 2.21.10 (2013-06-22) ============================ Fixes missing support for Xv (with the textured video adaptor) on diff --git a/configure.ac b/configure.ac index 7425fda..79018d5 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) AC_INIT([xf86-video-intel], - [2.21.10], + [2.21.11], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [xf86-video-intel]) AC_CONFIG_SRCDIR([Makefile.am]) commit 5005bd2d52ab64cbeae099d512d0b65be6c4abaa Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Jun 30 15:02:19 2013 +0100 intel: Fix failure code for reporting !drmCheckModesetingSupported The new function returns the fd, not a Bool, so the error code must now be -1. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/intel_device.c b/src/intel_device.c index cb48c34..da4d21e 100644 --- a/src/intel_device.c +++ b/src/intel_device.c @@ -98,7 +98,7 @@ static int __intel_open_device(const struct pci_device *pci, char **path) if (xf86LoadKernelModule("i915")) ret = drmCheckModesettingSupported(id); if (ret) - return FALSE; + return -1; /* Be nice to the user and load fbcon too */ (void)xf86LoadKernelModule("fbcon"); } commit f8738d7b4cc1c624d4390ef9ce7426ba457d7dd3 Author: Jonathan Gray <j...@jsg.id.au> Date: Sun Jun 30 19:37:45 2013 +1000 intel: replace direct ioctl use with drm{Set, Drop}Master Use drmSetMaster/drmDropMaster instead of calling the ioctls directly. Fixes compilation on OpenBSD where these ioctls aren't defined. Signed-off-by: Jonathan Gray <j...@jsg.id.au> diff --git a/src/intel_device.c b/src/intel_device.c index 5c36935..cb48c34 100644 --- a/src/intel_device.c +++ b/src/intel_device.c @@ -222,7 +222,7 @@ int intel_get_master(ScrnInfoPtr scrn) int retry = 2000; do { - ret = ioctl(dev->fd, DRM_IOCTL_SET_MASTER); + ret = drmSetMaster(dev->fd); if (ret == 0) break; usleep(1000); @@ -242,8 +242,8 @@ int intel_put_master(ScrnInfoPtr scrn) ret = 0; assert(dev->master_count); if (--dev->master_count == 0) { - assert(ioctl(dev->fd, DRM_IOCTL_SET_MASTER) == 0); - ret = ioctl(dev->fd, DRM_IOCTL_DROP_MASTER); + assert(drmSetMaster(dev->fd) == 0); + ret = drmDropMaster(dev->fd); } return ret; commit 40301e6d03f6e8d2d2d01e6bb9f1754a7e543a08 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Jun 30 11:12:34 2013 +0100 sna: Store the path used to open the device and pass to DRI Avoid having to search the device tree once again in order to simply recover the path we used to open the device. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/intel_device.c b/src/intel_device.c index 5c49db0..5c36935 100644 --- a/src/intel_device.c +++ b/src/intel_device.c @@ -42,6 +42,7 @@ #include "intel_driver.h" struct intel_device { + char *path; int fd; int open_count; int master_count; @@ -80,11 +81,11 @@ static int fd_set_cloexec(int fd) return fd; } -static int __intel_open_device(const struct pci_device *pci, const char *path) +static int __intel_open_device(const struct pci_device *pci, char **path) { int fd; - if (path == NULL) { + if (*path == NULL) { char id[20]; int ret; @@ -103,14 +104,21 @@ static int __intel_open_device(const struct pci_device *pci, const char *path) } fd = drmOpen(NULL, id); + if (fd != -1) { + *path = drmGetDeviceNameFromFd(fd); + if (*path == NULL) { + close(fd); + fd = -1; + } + } } else { #ifdef O_CLOEXEC - fd = open(path, O_RDWR | O_CLOEXEC); + fd = open(*path, O_RDWR | O_CLOEXEC); #else fd = -1; #endif if (fd == -1) - fd = fd_set_cloexec(open(path, O_RDWR)); + fd = fd_set_cloexec(open(*path, O_RDWR)); } return fd; @@ -121,6 +129,7 @@ int intel_open_device(int entity_num, const char *path) { struct intel_device *dev; + char *local_path; int fd; if (intel_device_key == -1) @@ -132,16 +141,20 @@ int intel_open_device(int entity_num, if (dev) return dev->fd; - fd = __intel_open_device(pci, path); + local_path = path ? strdup(path) : NULL; + + fd = __intel_open_device(pci, &local_path); if (fd == -1) return -1; dev = malloc(sizeof(*dev)); if (dev == NULL) { + free(local_path); close(fd); return -1; } + dev->path = local_path; dev->fd = fd; dev->open_count = 0; dev->master_count = 0; @@ -190,6 +203,13 @@ int intel_get_device(ScrnInfoPtr scrn) return dev->fd; } +const char *intel_get_device_name(ScrnInfoPtr scrn) +{ + struct intel_device *dev = intel_device(scrn); + assert(dev && dev->path); + return dev->path; +} + int intel_get_master(ScrnInfoPtr scrn) { struct intel_device *dev = intel_device(scrn); @@ -236,6 +256,7 @@ void __intel_uxa_release_device(ScrnInfoPtr scrn) intel_set_device(scrn, NULL); drmClose(dev->fd); + free(dev->path); free(dev); } } @@ -253,5 +274,6 @@ void intel_put_device(ScrnInfoPtr scrn) intel_set_device(scrn, NULL); drmClose(dev->fd); + free(dev->path); free(dev); } diff --git a/src/intel_driver.h b/src/intel_driver.h index ed58444..22b623f 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -313,6 +313,7 @@ void intel_detect_chipset(ScrnInfoPtr scrn, int intel_open_device(int entity_num, const struct pci_device *pci, const char *path); int intel_get_device(ScrnInfoPtr scrn); +const char *intel_get_device_name(ScrnInfoPtr scrn); int intel_get_master(ScrnInfoPtr scrn); int intel_put_master(ScrnInfoPtr scrn); void intel_put_device(ScrnInfoPtr scrn); diff --git a/src/sna/sna.h b/src/sna/sna.h index f720c64..7fe7359 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -293,7 +293,6 @@ struct sna { bool dri_available; bool dri_open; - char *deviceName; /* Broken-out options. */ OptionInfoPtr Options; diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c index e610d52..ca5f088 100644 --- a/src/sna/sna_dri.c +++ b/src/sna/sna_dri.c @@ -2445,11 +2445,10 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen) return false; } - sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd); memset(&info, '\0', sizeof(info)); info.fd = sna->kgem.fd; info.driverName = dri_driver_name(sna); - info.deviceName = sna->deviceName; + info.deviceName = intel_get_device_name(sna->scrn); DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName)); @@ -2487,5 +2486,4 @@ void sna_dri_close(struct sna *sna, ScreenPtr screen) { DBG(("%s()\n", __FUNCTION__)); DRI2CloseScreen(screen); - drmFree(sna->deviceName); } commit 17da58f904e75d434aaf71e297e15d41153ba954 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Jun 30 11:01:49 2013 +0100 sna: Replace conflicting drmDropMaster Calling drmDropMaster twice along the CloseScreen path is not a good idea. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c index 3428d5b..7092edc 100644 --- a/src/sna/sna_driver.c +++ b/src/sna/sna_driver.c @@ -734,6 +734,8 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL) DBG(("%s\n", __FUNCTION__)); + /* XXX Note that we will leak kernel resources if !vtSema */ + xf86_hide_cursors(scrn); sna_uevent_fini(scrn); @@ -749,8 +751,10 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL) sna->front = NULL; } - drmDropMaster(sna->kgem.fd); - scrn->vtSema = FALSE; + if (scrn->vtSema) { + intel_put_master(scrn); + scrn->vtSema = FALSE; + } xf86_cursors_fini(screen); @@ -773,7 +777,6 @@ static Bool sna_late_close_screen(CLOSE_SCREEN_ARGS_DECL) free(depths); free(screen->visuals); - intel_put_master(xf86ScreenToScrn(screen)); return TRUE; } commit 3a787da7e888da7e9943be94bd1cb177fe1495ab Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Jun 29 21:57:20 2013 +0100 sna: Allow tiled memcpy on i386 With the split into per-swizzle functions, and with the forced optimisation levels, it appears that i386 doesn't suffer so badly and the tiled memcpy are a viable method. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 5c029ad..aadc5f2 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -985,15 +985,6 @@ static void kgem_init_swizzling(struct kgem *kgem) { struct drm_i915_gem_get_tiling tiling; -#ifndef __x86_64__ - /* Between a register starved compiler emitting attrocious code - * and the extra overhead in the kernel for managing the tight - * 32-bit address space, unless we have a 64-bit system, - * using memcpy_to_tiled_x() is extremely slow. - */ - return; -#endif - if (kgem->gen < 050) /* bit17 swizzling :( */ return; commit 1d9941a7c003587c0bd732fb8b21fee5cefa6f87 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Jun 29 21:56:13 2013 +0100 sna: Add the Ofast option to the critical memcpy routines Always enable gcc to fully optimize the core memcpy routines (provided that optimisations are not entirely disabled, for instance for debugging). Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/compiler.h b/src/sna/compiler.h index 75e691a..641b490 100644 --- a/src/sna/compiler.h +++ b/src/sna/compiler.h @@ -66,7 +66,7 @@ #endif #if HAS_GCC(4, 5) && defined(__OPTIMIZE__) -#define fast_memcpy __attribute__((target("inline-all-stringops"))) +#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) #else #define fast_memcpy #endif commit 84c190db33142f3c1ec347ec0bf87f77ce132d36 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Jun 29 19:06:40 2013 +0100 sna: Fix get_image_inplace to use the pixmap offset The inplace routine assumed that the region to be read was already in pixmap coordinates. Making it so makes the code easier, so do it. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 2ed5007..80bc198 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -14279,12 +14279,11 @@ static int sna_create_gc(GCPtr gc) } static bool -sna_get_image_blt(DrawablePtr drawable, +sna_get_image_blt(PixmapPtr pixmap, RegionPtr region, char *dst, unsigned flags) { - PixmapPtr pixmap = get_drawable_pixmap(drawable); struct sna_pixmap *priv = sna_pixmap(pixmap); struct sna *sna = to_sna_from_pixmap(pixmap); struct kgem_bo *dst_bo; @@ -14343,21 +14342,17 @@ sna_get_image_blt(DrawablePtr drawable, DBG(("%s: download through a temporary map\n", __FUNCTION__)); pitch = PixmapBytePad(region->extents.x2 - region->extents.x1, - drawable->depth); + pixmap->drawable.depth); dst_bo = kgem_create_map(&sna->kgem, dst, pitch * (region->extents.y2 - region->extents.y1), false); if (dst_bo) { - int16_t dx, dy; - dst_bo->flush = true; dst_bo->pitch = pitch; kgem_bo_mark_unreusable(dst_bo); - get_drawable_deltas(drawable, pixmap, &dx, &dy); - ok = sna->render.copy_boxes(sna, GXcopy, - pixmap, priv->gpu_bo, dx, dy, + pixmap, priv->gpu_bo, 0, 0, pixmap, dst_bo, -region->extents.x1, -region->extents.y1, @@ -14373,12 +14368,11 @@ sna_get_image_blt(DrawablePtr drawable, } static bool -sna_get_image_inplace(DrawablePtr drawable, +sna_get_image_inplace(PixmapPtr pixmap, RegionPtr region, char *dst, unsigned flags) { - PixmapPtr pixmap = get_drawable_pixmap(drawable); struct sna_pixmap *priv = sna_pixmap(pixmap); struct sna *sna = to_sna_from_pixmap(pixmap); char *src; @@ -14411,14 +14405,13 @@ sna_get_image_inplace(DrawablePtr drawable, kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); - if (priv->gpu_bo->tiling) { DBG(("%s: download through a tiled CPU map\n", __FUNCTION__)); memcpy_from_tiled_x(&sna->kgem, src, dst, pixmap->drawable.bitsPerPixel, priv->gpu_bo->pitch, PixmapBytePad(region->extents.x2 - region->extents.x1, - drawable->depth), + pixmap->drawable.depth), region->extents.x1, region->extents.y1, 0, 0, region->extents.x2 - region->extents.x1, @@ -14429,7 +14422,7 @@ sna_get_image_inplace(DrawablePtr drawable, pixmap->drawable.bitsPerPixel, priv->gpu_bo->pitch, PixmapBytePad(region->extents.x2 - region->extents.x1, - drawable->depth), + pixmap->drawable.depth), region->extents.x1, region->extents.y1, 0, 0, region->extents.x2 - region->extents.x1, @@ -14447,7 +14440,6 @@ sna_get_image(DrawablePtr drawable, { RegionRec region; unsigned int flags; - bool can_blt; if (!fbDrawableEnabled(drawable)) return; @@ -14457,50 +14449,55 @@ sna_get_image(DrawablePtr drawable, (long)get_drawable_pixmap(drawable)->drawable.serialNumber, x, y, w, h, format, mask, drawable->depth)); - region.extents.x1 = x + drawable->x; - region.extents.y1 = y + drawable->y; - region.extents.x2 = region.extents.x1 + w; - region.extents.y2 = region.extents.y1 + h; - region.data = NULL; - - can_blt = (ACCEL_GET_IMAGE && - !FORCE_FALLBACK && - format == ZPixmap && - drawable->bitsPerPixel >= 8 && - PM_IS_SOLID(drawable, mask)); - flags = MOVE_READ; if ((w | h) == 1) flags |= MOVE_INPLACE_HINT; if (w == drawable->width) flags |= MOVE_WHOLE_HINT; - if (can_blt && sna_get_image_blt(drawable, ®ion, dst, flags)) - return; + if (ACCEL_GET_IMAGE && + !FORCE_FALLBACK && + format == ZPixmap && + drawable->bitsPerPixel >= 8 && + PM_IS_SOLID(drawable, mask)) { + PixmapPtr pixmap = get_drawable_pixmap(drawable); + int16_t dx, dy; - if (can_blt && sna_get_image_inplace(drawable, ®ion, dst, flags)) - return; + get_drawable_deltas(drawable, pixmap, &dx, &dy); + region.extents.x1 = x + drawable->x + dx; + region.extents.y1 = y + drawable->y + dy; + region.extents.x2 = region.extents.x1 + w; + region.extents.y2 = region.extents.y1 + h; + region.data = NULL; - if (!sna_drawable_move_region_to_cpu(drawable, ®ion, flags)) - return; + if (sna_get_image_blt(pixmap, ®ion, dst, flags)) + return; - if (can_blt) { - PixmapPtr pixmap = get_drawable_pixmap(drawable); - int16_t dx, dy; + if (sna_get_image_inplace(pixmap, ®ion, dst, flags)) + return; + + if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, + ®ion, flags)) + return; DBG(("%s: copy box (%d, %d), (%d, %d)\n", __FUNCTION__, region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2)); - get_drawable_deltas(drawable, pixmap, &dx, &dy); assert(has_coherent_ptr(sna_pixmap(pixmap))); memcpy_blt(pixmap->devPrivate.ptr, dst, drawable->bitsPerPixel, pixmap->devKind, PixmapBytePad(w, drawable->depth), - region.extents.x1 + dx, - region.extents.y1 + dy, - 0, 0, w, h); - } else - fbGetImage(drawable, x, y, w, h, format, mask, dst); + region.extents.x1, region.extents.y1, 0, 0, w, h); + } else { + region.extents.x1 = x + drawable->x; + region.extents.y1 = y + drawable->y; + region.extents.x2 = region.extents.x1 + w; + region.extents.y2 = region.extents.y1 + h; + region.data = NULL; + + if (sna_drawable_move_region_to_cpu(drawable, ®ion, flags)) + fbGetImage(drawable, x, y, w, h, format, mask, dst); + } } static void commit c7d246ba6f750ee080c38ccc5603d01fcf7fce92 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Jun 29 16:31:34 2013 +0100 sna: Move the clone discard into free-gpu Rather than peppering the discard manually before the call to free the GPU bo, always discard the COW when we actually free the GPU bo. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 2666798..2ed5007 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -423,6 +423,9 @@ sna_copy_init_blt(struct sna_copy_op *copy, static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv) { assert(priv->gpu_damage == NULL || priv->gpu_bo); + + if (priv->cow) + sna_pixmap_undo_cow(sna, priv, 0); assert(priv->cow == NULL); sna_damage_destroy(&priv->gpu_damage); @@ -2069,8 +2072,6 @@ mark_damage: pixmap->drawable.width, pixmap->drawable.height); assert(priv->gpu_damage == NULL); - if (priv->cow) - sna_pixmap_undo_cow(sna, priv, 0); sna_pixmap_free_gpu(sna, priv); if (priv->flush) { @@ -2083,14 +2084,14 @@ done: if (flags & MOVE_WRITE) { assert(DAMAGE_IS_ALL(priv->cpu_damage)); assert(priv->gpu_damage == NULL); + assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); if (priv->cow) sna_pixmap_undo_cow(sna, priv, 0); - priv->source_count = SOURCE_BIAS; - assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); - if (priv->gpu_bo && priv->gpu_bo->domain != DOMAIN_GPU) { - DBG(("%s: discarding inactive GPU bo\n", __FUNCTION__)); + if (priv->gpu_bo && priv->gpu_bo->rq == NULL) { + DBG(("%s: discarding idle GPU bo\n", __FUNCTION__)); sna_pixmap_free_gpu(sna, priv); } + priv->source_count = SOURCE_BIAS; } if (priv->cpu_bo) { @@ -2102,10 +2103,6 @@ done: assert(pixmap->devPrivate.ptr == (void *)((unsigned long)priv->cpu_bo->map & ~3)); assert((flags & MOVE_WRITE) == 0 || !kgem_bo_is_busy(priv->cpu_bo)); } - if (flags & MOVE_WRITE) { - DBG(("%s: discarding GPU bo in favour of CPU bo\n", __FUNCTION__)); - sna_pixmap_free_gpu(sna, priv); - } } priv->cpu = (flags & (MOVE_INPLACE_HINT | MOVE_ASYNC_HINT)) == 0 && @@ -2691,11 +2688,8 @@ done: pixmap->drawable.width, pixmap->drawable.height); if (DAMAGE_IS_ALL(priv->cpu_damage)) { - if (priv->gpu_bo) { - DBG(("%s: replaced entire pixmap\n", - __FUNCTION__)); - sna_pixmap_free_gpu(sna, priv); - } + DBG(("%s: replaced entire pixmap\n", __FUNCTION__)); + sna_pixmap_free_gpu(sna, priv); } if (priv->flush) { assert(!priv->shm); @@ -4032,8 +4026,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region, if (priv->gpu_bo && (replaces || priv->gpu_bo->proxy)) { DBG(("%s: discarding cached upload proxy\n", __FUNCTION__)); - if (priv->cow) - sna_pixmap_undo_cow(sna, priv, 0); sna_pixmap_free_gpu(sna, priv); } commit 6ab2a3acf71b5204c399c7649e5601c93a99f25f Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Jun 29 15:04:09 2013 +0100 sna: Improve checks for coherent access through CPU mappings Refactor the CPU mapping tests to a single function, and remember to test for a pending GPU write (i.e. bo->exec). Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/kgem.h b/src/sna/kgem.h index d1a391a..c7c7fce 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -564,6 +564,22 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; } +static inline bool kgem_bo_can_map__cpu(struct kgem *kgem, + struct kgem_bo *bo, + bool write) +{ + if (bo->scanout) + return false; + + if (kgem->has_llc) + return true; + + if (bo->domain != DOMAIN_CPU) + return false; + + return !write || bo->exec == NULL; +} + static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) { assert(bo->refcnt); diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 18836c8..2666798 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -1403,7 +1403,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap) } static inline bool has_coherent_map(struct sna *sna, - struct kgem_bo *bo) + struct kgem_bo *bo, + unsigned flags) { assert(bo->map); @@ -1413,7 +1414,7 @@ static inline bool has_coherent_map(struct sna *sna, if (bo->tiling != I915_TILING_NONE) return false; - return bo->domain == DOMAIN_CPU || sna->kgem.has_llc; + return kgem_bo_can_map__cpu(&sna->kgem, bo, flags & MOVE_WRITE); } static inline bool has_coherent_ptr(struct sna_pixmap *priv) @@ -1437,7 +1438,7 @@ static inline bool has_coherent_ptr(struct sna_pixmap *priv) static inline bool pixmap_inplace(struct sna *sna, PixmapPtr pixmap, struct sna_pixmap *priv, - bool write_only) + unsigned flags) { if (FORCE_INPLACE) return FORCE_INPLACE > 0; @@ -1446,9 +1447,9 @@ static inline bool pixmap_inplace(struct sna *sna, return false; if (priv->mapped) - return has_coherent_map(sna, priv->gpu_bo); + return has_coherent_map(sna, priv->gpu_bo, flags); - if (!write_only && priv->cpu_damage) + if (flags & MOVE_READ && priv->cpu_damage) return false; return (pixmap->devKind * pixmap->drawable.height >> 12) > @@ -1858,7 +1859,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags) if (!priv->mapped) goto skip_inplace_map; - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); pixmap->devKind = priv->gpu_bo->pitch; assert(priv->gpu_bo->proxy == NULL); @@ -1906,7 +1907,7 @@ skip_inplace_map: assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); if (operate_inplace(priv, flags) && - pixmap_inplace(sna, pixmap, priv, (flags & MOVE_READ) == 0) && + pixmap_inplace(sna, pixmap, priv, flags) && sna_pixmap_create_mappable_gpu(pixmap, (flags & MOVE_READ) == 0)) { DBG(("%s: try to operate inplace (GTT)\n", __FUNCTION__)); assert(priv->cow == NULL || (flags & MOVE_WRITE) == 0); @@ -1918,7 +1919,7 @@ skip_inplace_map: pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo); priv->mapped = pixmap->devPrivate.ptr != NULL; if (priv->mapped) { - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); pixmap->devKind = priv->gpu_bo->pitch; if (flags & MOVE_WRITE) { assert(priv->gpu_bo->proxy == NULL); @@ -1946,7 +1947,7 @@ skip_inplace_map: } if (priv->gpu_damage && priv->cpu_damage == NULL && !priv->cow && - (flags & MOVE_READ || priv->gpu_bo->domain == DOMAIN_CPU || sna->kgem.has_llc) && + (flags & MOVE_READ || kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE)) && priv->gpu_bo->tiling == I915_TILING_NONE && ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 || !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))) { @@ -2161,7 +2162,7 @@ static inline bool region_inplace(struct sna *sna, PixmapPtr pixmap, RegionPtr region, struct sna_pixmap *priv, - bool write_only) + unsigned flags) { assert_pixmap_damage(pixmap); @@ -2171,7 +2172,7 @@ static inline bool region_inplace(struct sna *sna, if (wedged(sna) && !priv->pinned) return false; - if ((priv->cpu || !write_only) && + if ((priv->cpu || flags & MOVE_READ) && region_overlaps_damage(region, priv->cpu_damage, 0, 0)) { DBG(("%s: no, uncovered CPU damage pending\n", __FUNCTION__)); return false; @@ -2184,7 +2185,7 @@ static inline bool region_inplace(struct sna *sna, if (priv->mapped) { DBG(("%s: yes, already mapped, continuiung\n", __FUNCTION__)); - return has_coherent_map(sna, priv->gpu_bo); + return has_coherent_map(sna, priv->gpu_bo, flags); } if (priv->flush) { @@ -2301,7 +2302,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, } if (operate_inplace(priv, flags) && - region_inplace(sna, pixmap, region, priv, (flags & MOVE_READ) == 0) && + region_inplace(sna, pixmap, region, priv, flags) && sna_pixmap_create_mappable_gpu(pixmap, false)) { DBG(("%s: try to operate inplace\n", __FUNCTION__)); assert(priv->cow == NULL || (flags & MOVE_WRITE) == 0); @@ -2312,7 +2313,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo); priv->mapped = pixmap->devPrivate.ptr != NULL; if (priv->mapped) { - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); pixmap->devKind = priv->gpu_bo->pitch; if (flags & MOVE_WRITE) { if (!DAMAGE_IS_ALL(priv->gpu_damage)) { @@ -2359,7 +2360,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, sna_damage_contains_box__no_reduce(priv->gpu_damage, ®ion->extents)) && priv->gpu_bo->tiling == I915_TILING_NONE && - (priv->gpu_bo->domain == DOMAIN_CPU || sna->kgem.has_llc) && + kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE) && ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 || !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))) { DBG(("%s: try to operate inplace (CPU), read? %d, write? %d\n", @@ -2370,7 +2371,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, pixmap->devPrivate.ptr = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); if (pixmap->devPrivate.ptr != NULL) { - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); assert(IS_CPU_MAP(priv->gpu_bo->map)); pixmap->devKind = priv->gpu_bo->pitch; priv->cpu = true; @@ -3966,13 +3967,7 @@ static bool can_upload_tiled_x(struct kgem *kgem, struct sna_pixmap *priv) return false; } - if (bo->scanout) { - DBG(("%s: no, is scanout\n", __FUNCTION__, bo->scanout)); - return false; - } - - DBG(("%s? domain=%d, has_llc=%d\n", __FUNCTION__, bo->domain, kgem->has_llc)); - return bo->domain == DOMAIN_CPU || kgem->has_llc; + return kgem_bo_can_map__cpu(kgem, bo, true); } static bool @@ -4972,7 +4967,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, hint = source_prefer_gpu(sna, src_priv, region, src_dx, src_dy) ?: region_inplace(sna, dst_pixmap, region, - dst_priv, alu_overwrites(alu)); + dst_priv, alu_overwrites(alu) ? MOVE_WRITE : MOVE_READ | MOVE_WRITE); if (dst_priv->cpu_damage && alu_overwrites(alu)) { DBG(("%s: overwritting CPU damage\n", __FUNCTION__)); if (region_subsumes_damage(region, dst_priv->cpu_damage)) { @@ -14409,10 +14404,7 @@ sna_get_image_inplace(DrawablePtr drawable, break; } - if (priv->gpu_bo->scanout) - return false; - - if (!sna->kgem.has_llc && priv->gpu_bo->domain != DOMAIN_CPU) + if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, false)) return false; if (priv->gpu_damage == NULL || diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index 6ab907f..a4932b8 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -53,15 +53,12 @@ static inline bool must_tile(struct sna *sna, int width, int height) upload_too_large(sna, width, height)); } -static bool bo_inplace_tiled(struct kgem *kgem, struct kgem_bo *bo) +static bool bo_inplace_tiled(struct kgem *kgem, struct kgem_bo *bo, bool write) { if (bo->tiling != I915_TILING_X) return false; - if (bo->scanout) - return false; - - return bo->domain == DOMAIN_CPU || kgem->has_llc; + return kgem_bo_can_map__cpu(kgem, bo, write); } static bool download_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) @@ -69,7 +66,7 @@ static bool download_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) if (!kgem->memcpy_from_tiled_x) return false; - return bo_inplace_tiled(kgem, bo); + return bo_inplace_tiled(kgem, bo, false); } static bool @@ -537,7 +534,7 @@ static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) if (!kgem->memcpy_to_tiled_x) return false; - return bo_inplace_tiled(kgem, bo); + return bo_inplace_tiled(kgem, bo, true); } static bool commit 9026bb954646c0425360c2236e26c79d097142cd Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Fri Jun 28 15:59:17 2013 +0100 sna: Inspect the dirty boxes when querying whether damage contains a rectangle This helps in the cases where we have subtracted a small number of rectangles from an all-damage pixmap (such as a number of successive GetImage, PutImage operations). The danger is that we end up searching a long list of dirty boxes - maybe just search the first chunk if that becomes noticeable? -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1uvbsc-0006vd...@vasks.debian.org