Makefile.am | 6 NEWS | 14 benchmarks/.gitignore | 2 benchmarks/Makefile.am | 14 benchmarks/dri2-swap.c | 588 + benchmarks/dri3-swap.c | 595 + configure.ac | 115 debian/changelog | 76 debian/control | 10 debian/patches/fix-fd-leak-when-intel-scrn-create-fails.diff | 117 debian/patches/fix-sna-fstat-include.patch | 55 debian/patches/fix-uxa-fstat-include.patch | 25 debian/patches/fix-yuv-to-rgb-shared-on-intel-gen8.patch | 51 debian/patches/series | 5 debian/patches/sna-let-modesetting-handle-gen9+.diff | 40 debian/rules | 7 debian/xserver-xorg-video-intel.install | 4 debian/xserver-xorg-video-intel.install.kfreebsd-amd64 | 4 debian/xserver-xorg-video-intel.install.kfreebsd-i386 | 4 libobj/alloca.c | 4 man/intel.man | 53 src/backlight.c | 70 src/backlight.h | 2 src/compat-api.h | 27 src/i915_pciids.h | 128 src/intel_device.c | 53 src/intel_driver.h | 1 src/intel_list.h | 38 src/intel_module.c | 79 src/intel_options.c | 91 src/intel_options.h | 7 src/legacy/i810/i810_common.h | 4 src/legacy/i810/i810_hwmc.c | 2 src/legacy/i810/i810_memory.c | 6 src/legacy/i810/i810_reg.h | 2 src/legacy/i810/xvmc/I810XvMC.c | 6 src/render_program/exa_wm.g4i | 2 src/render_program/exa_wm_yuv_rgb.g8a | 4 src/render_program/exa_wm_yuv_rgb.g8b | 2 src/sna/Makefile.am | 2 src/sna/blt.c | 1391 ++- src/sna/brw/brw_eu_emit.c | 2 src/sna/compiler.h | 25 src/sna/fb/fb.h | 4 src/sna/fb/fbpict.h | 4 src/sna/gen2_render.c | 6 src/sna/gen3_render.c | 214 src/sna/gen4_render.c | 60 src/sna/gen5_render.c | 55 src/sna/gen6_common.h | 130 src/sna/gen6_render.c | 133 src/sna/gen7_render.c | 186 src/sna/gen8_render.c | 223 src/sna/gen8_render.h | 1 src/sna/gen9_render.c | 4101 +++++++++++ src/sna/gen9_render.h | 1130 +++ src/sna/kgem.c | 1589 ++-- src/sna/kgem.h | 90 src/sna/kgem_debug_gen4.c | 2 src/sna/kgem_debug_gen5.c | 2 src/sna/kgem_debug_gen6.c | 2 src/sna/sna.h | 130 src/sna/sna_accel.c | 708 + src/sna/sna_blt.c | 221 src/sna/sna_composite.c | 104 src/sna/sna_damage.h | 5 src/sna/sna_display.c | 3117 ++++++-- src/sna/sna_display_fake.c | 7 src/sna/sna_dri2.c | 2136 +++-- src/sna/sna_dri3.c | 14 src/sna/sna_driver.c | 338 src/sna/sna_glyphs.c | 94 src/sna/sna_io.c | 31 src/sna/sna_present.c | 652 + src/sna/sna_render.c | 103 src/sna/sna_render.h | 67 src/sna/sna_render_inline.h | 6 src/sna/sna_tiling.c | 4 src/sna/sna_trapezoids_boxes.c | 4 src/sna/sna_trapezoids_imprecise.c | 344 src/sna/sna_trapezoids_mono.c | 125 src/sna/sna_trapezoids_precise.c | 344 src/sna/sna_video.c | 66 src/sna/sna_video.h | 25 src/sna/sna_video_overlay.c | 22 src/sna/sna_video_sprite.c | 105 src/sna/sna_video_textured.c | 18 src/sna/xassert.h | 22 src/uxa/i830_reg.h | 6 src/uxa/i965_video.c | 1 src/uxa/intel.h | 17 src/uxa/intel_batchbuffer.c | 11 src/uxa/intel_batchbuffer.h | 19 src/uxa/intel_display.c | 182 src/uxa/intel_dri.c | 484 - src/uxa/intel_driver.c | 38 src/uxa/intel_hwmc.c | 2 src/uxa/intel_memory.c | 2 src/uxa/intel_present.c | 31 src/uxa/intel_uxa.c | 35 test/.gitignore | 6 test/Makefile.am | 12 test/basic-copyplane.c | 99 test/dri2-race.c | 764 +- test/dri2-speed.c | 342 test/dri2-test.c | 122 test/dri3-test.c | 62 test/dri3.c | 34 test/present-race.c | 484 + test/present-speed.c | 669 + test/present-test.c | 1657 ++++ test/render-glyphs.c | 441 + test/render-trapezoid.c | 125 test/render-triangle.c | 180 test/test.h | 9 test/test_image.c | 36 test/xvidmode.c | 54 tools/.gitignore | 4 tools/Makefile.am | 21 tools/backlight_helper.c | 6 tools/cursor.c | 127 tools/dri3info.c | 329 tools/virtual.c | 440 - 123 files changed, 22344 insertions(+), 4984 deletions(-)
New commits: commit 66587129b4bc9db56e464a2a1364071f114e1a64 Author: Vincent Cheng <vch...@debian.org> Date: Sun May 29 23:15:03 2016 -0700 add bpo changelog entry diff --git a/debian/changelog b/debian/changelog index ad04191..d9a9469 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +xserver-xorg-video-intel (2:2.99.917+git20160522-1~bpo8+1) jessie-backports; urgency=medium + + * Rebuild for jessie-backports. + + -- Vincent Cheng <vch...@debian.org> Sun, 29 May 2016 23:14:19 -0700 + xserver-xorg-video-intel (2:2.99.917+git20160522-1) unstable; urgency=medium * New upstream snapshot. (Closes: #823116) commit a1ca6219097af9cefd7561d7cb20349ac4314d6b Author: Timo Aaltonen <tjaal...@debian.org> Date: Tue May 24 11:40:18 2016 +0300 upload to unstable diff --git a/debian/changelog b/debian/changelog index c3c282b..98ceac2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,9 +1,10 @@ -xserver-xorg-video-intel (2:2.99.917+git20160325-2) UNRELEASED; urgency=medium +xserver-xorg-video-intel (2:2.99.917+git20160522-1) unstable; urgency=medium + * New upstream snapshot. (Closes: #823116) * fix-fd-leak-when-intel-scrn-create-fails.diff: Fix a failure falling back on another driver when scrn create fails. - -- Timo Aaltonen <tjaal...@debian.org> Fri, 22 Apr 2016 15:58:10 +0300 + -- Timo Aaltonen <tjaal...@debian.org> Tue, 24 May 2016 11:40:02 +0300 xserver-xorg-video-intel (2:2.99.917+git20160325-1) unstable; urgency=medium commit 8477615ae1bd284aca1221185ffefe0630d3f7ab Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun May 22 14:18:20 2016 +0100 sna: Allow disconnected outputs to retain state without EDID checks Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 79c660f..4ddb20e 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -5056,6 +5056,9 @@ output_check_status(struct sna *sna, struct sna_output *output) if (output->status != status) return false; + if (status != XF86OutputStatusConnected) + return true; + if (output->edid_len == 0) return false; commit 25d2c2d049a8c9f2b2ef0895d6079c8b273ad121 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Wed May 18 23:26:12 2016 +0100 sna: Confirm the EDID is the same after a hotplug before ignoring Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 16d0321..79c660f 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -5021,19 +5021,25 @@ output_check_status(struct sna *sna, struct sna_output *output) { union compat_mode_get_connector compat_conn; struct drm_mode_modeinfo dummy; + struct drm_mode_get_blob blob; xf86OutputStatus status; + char *edid; VG_CLEAR(compat_conn); + compat_conn.conn.connection = -1; compat_conn.conn.connector_id = output->id; compat_conn.conn.count_modes = 1; /* skip detect */ compat_conn.conn.modes_ptr = (uintptr_t)&dummy; compat_conn.conn.count_encoders = 0; - compat_conn.conn.count_props = 0; + compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids; + compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values; + compat_conn.conn.count_props = output->num_props; - (void)drmIoctl(sna->kgem.fd, - DRM_IOCTL_MODE_GETCONNECTOR, - &compat_conn.conn); + if (drmIoctl(sna->kgem.fd, + DRM_IOCTL_MODE_GETCONNECTOR, + &compat_conn.conn) == 0) + output->update_properties = false; switch (compat_conn.conn.connection) { case DRM_MODE_CONNECTED: @@ -5047,7 +5053,25 @@ output_check_status(struct sna *sna, struct sna_output *output) status = XF86OutputStatusUnknown; break; } - return output->status == status; + if (output->status != status) + return false; + + if (output->edid_len == 0) + return false; + + edid = alloca(output->edid_len); + + VG_CLEAR(blob); + blob.blob_id = output->prop_values[output->edid_idx]; + blob.length = output->edid_len; + blob.data = (uintptr_t)edid; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) + return false; + + if (blob.length != output->edid_len) + return false; + + return memcmp(edid, output->edid_raw, output->edid_len) == 0; } void sna_mode_discover(struct sna *sna, bool tell) commit a508b11bde9f3119b49b3e0f652587efb9e037af Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Tue May 17 07:55:03 2016 +0100 sna: Don't skip migration-to-GPU for TearFree In 46caee86db0f ("sna: Fix reporting of errno after setcrtc failure"), the intention was to avoid reporting a fail to migrate whilst wedged for a simple copy from the frontbuffer to TearFree's shadow buffer. However, by skipping the migration, we never flushed any dirt from the CPU buffer prior to doing the TearFree flip. References: https://bugs.freedesktop.org/show_bug.cgi?id=95401#c7 References: https://bugs.freedesktop.org/show_bug.cgi?id=95414#c4 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 7976171..16d0321 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -8519,11 +8519,9 @@ static bool move_crtc_to_gpu(struct sna *sna) xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); int i; - if (sna->flags & SNA_TEAR_FREE) - return true; - for (i = 0; i < sna->mode.num_real_crtc; i++) { struct sna_crtc *crtc = to_sna_crtc(config->crtc[i]); + unsigned hint; assert(crtc); @@ -8539,10 +8537,13 @@ static bool move_crtc_to_gpu(struct sna *sna) if (crtc->shadow_bo) continue; + hint = MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT; + if (sna->flags & SNA_TEAR_FREE) + hint |= __MOVE_FORCE; + DBG(("%s: CRTC %d [pipe=%d] requires frontbuffer\n", __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc))); - return sna_pixmap_move_to_gpu(sna->front, - MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT); + return sna_pixmap_move_to_gpu(sna->front, hint); } return true; commit 48569eb18d125e20aa817549506fc4c1609829c9 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Tue May 17 07:45:46 2016 +0100 sna: Track the minimum damage when doing CRTC-local TearFree We avoid having to redraw the entire CRTC's buffer on every flip as we know the contents from the previous flip are still available and only need to invalidate the dirty region. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index d01e6a4..7976171 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -210,6 +210,7 @@ struct sna_crtc { struct pict_f_transform cursor_to_fb, fb_to_cursor; + RegionRec crtc_damage; uint16_t shadow_bo_width, shadow_bo_height; uint32_t rotation; @@ -2749,6 +2750,7 @@ sna_crtc_damage(xf86CrtcPtr crtc) assert(sna->mode.shadow_damage && sna->mode.shadow_active); damage = DamageRegion(sna->mode.shadow_damage); RegionUnion(damage, damage, ®ion); + to_sna_crtc(crtc)->crtc_damage = region; DBG(("%s: damage now %dx[(%d, %d), (%d, %d)]\n", __FUNCTION__, @@ -8693,23 +8695,28 @@ void sna_mode_redisplay(struct sna *sna) sigio = sigio_block(); if (!box_empty(&damage.extents)) { if (sna->flags & SNA_TEAR_FREE) { + RegionRec new_damage; struct drm_mode_crtc_page_flip arg; struct kgem_bo *bo; - RegionUninit(&damage); - damage.extents = crtc->bounds; - damage.data = NULL; + RegionNull(&new_damage); + RegionCopy(&new_damage, &damage); bo = sna_crtc->cache_bo; - if (bo == NULL) + if (bo == NULL) { + damage.extents = crtc->bounds; + damage.data = NULL; bo = kgem_create_2d(&sna->kgem, crtc->mode.HDisplay, crtc->mode.VDisplay, crtc->scrn->bitsPerPixel, sna_crtc->bo->tiling, CREATE_SCANOUT); - if (bo == NULL) - continue; + if (bo == NULL) + continue; + } else + RegionUnion(&damage, &damage, &sna_crtc->crtc_damage); + sna_crtc->crtc_damage = new_damage; sna_crtc_redisplay(crtc, &damage, bo); kgem_bo_submit(&sna->kgem, bo); commit ab3ab412a459a95e94707a4e39a18790ee04e6e3 Author: Chris Bainbridge <chris.bainbri...@gmail.com> Date: Tue May 17 07:26:10 2016 +0100 sna: Fix increment of modeset serial after applying CRTC We track how many mode sets have been made in order to detect stale flips (i.e. a sequence that crosses a mode change). This was broken by the logic inversion in setcrc in 46caee86db0f ("sna: Fix reporting of errno after setcrtc failure") References: https://bugs.freedesktop.org/show_bug.cgi?id=95401 Signed-off-by: Chris Bainbridge <chris.bainbri...@gmail.com> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 867a10b..d01e6a4 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -1447,9 +1447,12 @@ sna_crtc_apply(xf86CrtcPtr crtc) ret = 0; if (unlikely(drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg))) { ret = errno; - sna_crtc->mode_serial++; - sna_crtc_force_outputs_on(crtc); + goto unblock; } + + sna_crtc->mode_serial++; + sna_crtc_force_outputs_on(crtc); + unblock: kmsg_close(&kmsg, ret); sigio_unblock(sigio); commit 34f63f28c8f39e1d64cd159482f86cacbbff05b4 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Mon May 16 10:14:04 2016 +0100 sna: Ensure we only cache the local CRTC scanout buffer Double check that we are not about to cache the common, untransformed, shadow buffer for the per-CRTC transformed buffer. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 080efbe..867a10b 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -8777,9 +8777,11 @@ disable1: sna_crtc->flip_serial = sna_crtc->mode_serial; sna_crtc->flip_pending = true; - assert_scanout(&sna->kgem, sna_crtc->bo, - crtc->mode.HDisplay, crtc->mode.VDisplay); - sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo); + if (sna_crtc->bo != sna->mode.shadow) { + assert_scanout(&sna->kgem, sna_crtc->bo, + crtc->mode.HDisplay, crtc->mode.VDisplay); + sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo); + } DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", __FUNCTION__, __sna_crtc_id(sna_crtc), sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial)); } else { commit 3a7d6afd85f85b8b10bf0c08b7b5fa5265624850 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat May 14 16:13:33 2016 +0100 sna: Do not cache the current crtc bo after performing the setcrtc When doing a SETCRTC as a fallack for a failed pageflip, do not use the then current CRTC bo as the next bo for pageflipping - as then we will render into it prior to flipping and so cause tearing. References: https://bugs.freedesktop.org/show_bug.cgi?id=95401 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 2c6059d..080efbe 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -8734,7 +8734,6 @@ void sna_mode_redisplay(struct sna *sna) sna_crtc->bo = kgem_bo_reference(bo); sna_crtc->bo->active_scanout++; - sna_crtc->cache_bo = kgem_bo_reference(bo); } else { BoxRec box; DrawableRec tmp; @@ -8762,10 +8761,10 @@ disable1: __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc)); sna_crtc_disable(crtc, false); } - - kgem_bo_destroy(&sna->kgem, bo); - sna_crtc->cache_bo = NULL; } + + kgem_bo_destroy(&sna->kgem, bo); + sna_crtc->cache_bo = NULL; continue; } sna->mode.flip_active++; @@ -8778,6 +8777,8 @@ disable1: sna_crtc->flip_serial = sna_crtc->mode_serial; sna_crtc->flip_pending = true; + assert_scanout(&sna->kgem, sna_crtc->bo, + crtc->mode.HDisplay, crtc->mode.VDisplay); sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo); DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", __FUNCTION__, __sna_crtc_id(sna_crtc), sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial)); commit f71447998cc8e22570cd5641bcf008cb68e9f4a3 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat May 14 09:00:08 2016 +0100 sna/dri2: Refactor open-coded __kgem_bo_is_busy Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c index ca9251a..59877e9 100644 --- a/src/sna/sna_dri2.c +++ b/src/sna/sna_dri2.c @@ -2553,22 +2553,15 @@ static inline bool rq_is_busy(struct kgem *kgem, struct kgem_bo *bo) if (bo == NULL) return false; - DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, - bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); - assert(bo->refcnt); - - if (bo->exec) - return true; - - if (bo->rq == NULL) - return false; - - return __kgem_busy(kgem, bo->handle); + return __kgem_bo_is_busy(kgem, bo); } static bool sna_dri2_blit_complete(struct sna_dri2_event *info) { - if (rq_is_busy(&info->sna->kgem, info->bo)) { + if (!info->bo) + return true; + + if (__kgem_bo_is_busy(&info->sna->kgem, info->bo)) { DBG(("%s: vsync'ed blit is still busy, postponing\n", __FUNCTION__)); if (sna_next_vblank(info)) @@ -2578,10 +2571,9 @@ static bool sna_dri2_blit_complete(struct sna_dri2_event *info) } DBG(("%s: blit finished\n", __FUNCTION__)); - if (info->bo) { - kgem_bo_destroy(&info->sna->kgem, info->bo); - info->bo = NULL; - } + kgem_bo_destroy(&info->sna->kgem, info->bo); + info->bo = NULL; + return true; } commit 3c95efe5f7989d95153f527eb7d2946d3bbc2af1 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat May 14 08:49:46 2016 +0100 sna/dri2: Force blocking wait if vblank queue fails Whilst waiting for the previous blit to complete, if we fail to queue the vblank to wake up on the next frame, block before replying the blit is complete. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c index 4ffa7c3..ca9251a 100644 --- a/src/sna/sna_dri2.c +++ b/src/sna/sna_dri2.c @@ -2573,6 +2573,8 @@ static bool sna_dri2_blit_complete(struct sna_dri2_event *info) __FUNCTION__)); if (sna_next_vblank(info)) return false; + + kgem_bo_sync__gtt(&info->sna->kgem, info->bo); } DBG(("%s: blit finished\n", __FUNCTION__)); commit 1486cfdf04b070787074493a30cd698455b016fe Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Fri May 13 22:53:26 2016 +0100 sna/gen6+: Don't force a switch to BLT if the target bo cannot be blitted Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen6_common.h b/src/sna/gen6_common.h index 83adc7d..b53ec0c 100644 --- a/src/sna/gen6_common.h +++ b/src/sna/gen6_common.h @@ -52,6 +52,9 @@ inline static bool can_switch_to_blt(struct sna *sna, if (bo && bo->tiling == I915_TILING_Y) return false; + if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) + return false; + if (sna->render_state.gt < 2) return true; commit 512284fd47bc225236e403920647703ea4842666 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Mon May 9 21:11:13 2016 +0100 sna/dri2: Enforce swap-limits on stale buffers If the client sends an out-of-date swap request, first make sure that we don't cause an error by chasing a NULL CRTC and secondly force them to wait for a whole vblank before the next swap. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c index d3fe18b..4ffa7c3 100644 --- a/src/sna/sna_dri2.c +++ b/src/sna/sna_dri2.c @@ -1617,6 +1617,9 @@ static void fake_swap_complete(struct sna *sna, ClientPtr client, assert(draw); + if (crtc == NULL) + crtc = sna_primary_crtc(sna); + swap = sna_crtc_last_swap(crtc); DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n", __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_pipe(crtc) : -1, @@ -3340,7 +3343,7 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, __FUNCTION__, get_private(front)->pixmap->drawable.serialNumber, get_drawable_pixmap(draw)->drawable.serialNumber)); - goto fake; + goto skip; } if (get_private(back)->stale) { @@ -3484,7 +3487,7 @@ skip: if (!sna_next_vblank(info)) goto fake; - swap_limit(draw, 2); + swap_limit(draw, 1); } else { fake: /* XXX Use a Timer to throttle the client? */ commit 88733a7874f7c9b45da5d612802947a9de12893a Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat May 7 15:36:53 2016 +0100 sna/dri2: Force consideration of the DRI2CopyRegion source as unclean Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c index bb7070e..d3fe18b 100644 --- a/src/sna/sna_dri2.c +++ b/src/sna/sna_dri2.c @@ -1246,6 +1246,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, src_bo = src_priv->bo; assert(src_bo->refcnt); + kgem_bo_unclean(&sna->kgem, src_bo); if (is_front(src->attachment)) { struct sna_pixmap *priv; commit 08865b0af288e0460c38c2e3ca20a7f9d0311f27 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat May 7 15:24:28 2016 +0100 sna: Add a special case for fast DRI2CopyRegion and NoAccel Enable copying onto a scanout buffer using a WC mmap - so long as it is X-tiled and no swizzling. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/blt.c b/src/sna/blt.c index eced971..ab7bd22 100644 --- a/src/sna/blt.c +++ b/src/sna/blt.c @@ -349,6 +349,71 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, } } +static fast_memcpy void +memcpy_between_tiled_x__swizzle_0(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); + assert((dst_x & tile_mask) == (src_x & tile_mask)); + + while (height--) { + unsigned w = width * cpp; + uint8_t *dst_row = dst; + const uint8_t *src_row = src; + + dst_row += dst_y / tile_height * dst_stride * tile_height; + dst_row += (dst_y & (tile_height-1)) * tile_width; + if (dst_x) + dst_row += (dst_x >> tile_shift) * tile_size; + dst_y++; + + src_row += src_y / tile_height * src_stride * tile_height; + src_row += (src_y & (tile_height-1)) * tile_width; + if (src_x) + src_row += (src_x >> tile_shift) * tile_size; + src_y++; + + if (dst_x & tile_mask) { + const unsigned x = (dst_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); + + memcpy(assume_misaligned(dst_row + x, tile_width, x), + assume_misaligned(src_row + x, tile_width, x), + len); + + dst_row += tile_size; + src_row += tile_size; + w -= len; + } + + while (w >= tile_width) { + memcpy(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + tile_width); + dst_row += tile_size; + src_row += tile_size; + w -= tile_width; + } + memcpy(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + w); + } +} + #if defined(sse2) && defined(__x86_64__) sse2 static force_inline void @@ -461,7 +526,7 @@ sse2 static void to_memcpy(uint8_t *dst, const uint8_t *src, unsigned len) while (len >= 64) { to_sse64(dst, src); dst += 64; - src = (const uint8_t *)src + 64; + src += 64; len -= 64; } if (len == 0) @@ -470,22 +535,22 @@ sse2 static void to_memcpy(uint8_t *dst, const uint8_t *src, unsigned len) if (len & 32) { to_sse32(dst, src); dst += 32; - src = (const uint8_t *)src + 32; + src += 32; } if (len & 16) { to_sse16(dst, src); dst += 16; - src = (const uint8_t *)src + 16; + src += 16; } if (len & 8) { *(uint64_t *)dst = *(uint64_t *)src; dst += 8; - src = (const uint8_t *)src + 8; + src += 8; } if (len & 4) { *(uint32_t *)dst = *(uint32_t *)src; dst += 4; - src = (const uint8_t *)src + 4; + src += 4; } memcpy(dst, src, len & 3); } @@ -820,6 +885,86 @@ memcpy_from_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, } } +sse2 static fast_memcpy void +memcpy_between_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; + + unsigned ox, lx; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); + + width *= cpp; + dst_stride *= tile_height; + src_stride *= tile_height; + + assert((dst_x & tile_mask) == (src_x & tile_mask)); + if (dst_x & tile_mask) { + ox = (dst_x & tile_mask) * cpp; + lx = min(tile_width - ox, width); + assert(lx != 0); + } else + lx = 0; + + if (dst_x) + dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size; + if (src_x) + src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size; + + while (height--) { + const uint8_t *src_row; + uint8_t *dst_row; + unsigned w = width; + + dst_row = dst; + dst_row += dst_y / tile_height * dst_stride; + dst_row += (dst_y & (tile_height-1)) * tile_width; + dst_y++; + + src_row = src; + src_row += src_y / tile_height * src_stride; + src_row += (src_y & (tile_height-1)) * tile_width; + src_y++; + + if (lx) { + to_memcpy(dst_row + ox, src_row + ox, lx); + dst_row += tile_size; + src_row += tile_size; + w -= lx; + } + while (w >= tile_width) { + assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); + assert(((uintptr_t)src_row & (tile_width - 1)) == 0); + to_sse128xN(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + tile_width); + dst_row += tile_size; + src_row += tile_size; + w -= tile_width; + } + if (w) { + assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); + assert(((uintptr_t)src_row & (tile_width - 1)) == 0); + to_memcpy(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + w); + } + } +} + #endif #define memcpy_to_tiled_x(swizzle) \ @@ -1100,11 +1245,13 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu) if (cpu & SSE2) { kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0__sse2; kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0__sse2; + kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0__sse2; } else #endif { kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; + kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0; } break; case I915_BIT_6_SWIZZLE_9: diff --git a/src/sna/kgem.h b/src/sna/kgem.h index cd07756..ded8f78 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -113,6 +113,12 @@ enum { NUM_MAP_TYPES, }; +typedef void (*memcpy_box_func)(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); + struct kgem { unsigned wedged; int fd; @@ -212,16 +218,9 @@ struct kgem { void (*retire)(struct kgem *kgem); void (*expire)(struct kgem *kgem); - void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height); - void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height); + memcpy_box_func memcpy_to_tiled_x; + memcpy_box_func memcpy_from_tiled_x; + memcpy_box_func memcpy_between_tiled_x; struct kgem_bo *batch_bo; diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index 5a8df06..f8281e9 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -2298,16 +2298,22 @@ static bool can_copy_cpu(struct sna *sna, struct kgem_bo *src, struct kgem_bo *dst) { - if (src->tiling != dst->tiling) - return false; + DBG(("%s: tiling=%d:%d, pitch=%d:%d, can_map=%d:%d[%d]\n", + __FUNCTION__, + src->tiling, dst->tiling, + src->pitch, dst->pitch, + kgem_bo_can_map__cpu(&sna->kgem, src, false), + kgem_bo_can_map__cpu(&sna->kgem, dst, true), + sna->kgem.has_wc_mmap)); - if (src->pitch != dst->pitch) + if (src->tiling != dst->tiling) return false; if (!kgem_bo_can_map__cpu(&sna->kgem, src, false)) return false; - if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true)) + if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true) && + !sna->kgem.has_wc_mmap) return false; DBG(("%s -- yes, src handle=%d, dst handle=%d\n", __FUNCTION__, src->handle, dst->handle)); @@ -2320,8 +2326,8 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, const DrawableRec *dst_draw, struct kgem_bo *dst_bo, int16_t dx, int16_t dy, const BoxRec *box, int n, unsigned flags) { + memcpy_box_func detile = NULL; void *dst, *src; - bool clipped; if (op != GXcopy) return false; @@ -2329,25 +2335,53 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, if (src_draw->depth != dst_draw->depth) return false; - clipped = (n > 1 || - box->x1 + dx > 0 || - box->y1 + dy > 0 || - box->x2 + dx < dst_draw->width || - box->y2 + dy < dst_draw->height); - dst = src = NULL; - if (!clipped && can_copy_cpu(sna, src_bo, dst_bo)) { - dst = kgem_bo_map__cpu(&sna->kgem, dst_bo); + if (can_copy_cpu(sna, src_bo, dst_bo)) { + if (src_bo->pitch != dst_bo->pitch || + dx != sx || dy != sy || n > 1 || + box->x1 + dx > 0 || + box->y1 + dy > 0 || + box->x2 + dx < dst_draw->width || + box->y2 + dy < dst_draw->height) { + if (dx != sx) /* not implemented in memcpy yet */ + goto use_gtt; + + switch (dst_bo->tiling) { + default: + case I915_TILING_Y: + goto use_gtt; + + case I915_TILING_X: + detile = sna->kgem.memcpy_between_tiled_x; + if (detile == NULL)