ChangeLog | 352 ++++++++++++++++++++++ NEWS | 22 + configure.ac | 2 debian/changelog | 26 + src/i965_render.c | 2 src/i965_video.c | 2 src/intel.h | 2 src/intel_display.c | 1 src/intel_driver.c | 34 -- src/intel_driver.h | 17 - src/intel_module.c | 33 +- src/legacy/i810/i810_driver.c | 33 -- src/sna/fb/fbpict.c | 28 + src/sna/gen4_render.c | 242 +++++++++------ src/sna/gen5_render.c | 5 src/sna/gen7_render.c | 30 + src/sna/gen7_render.h | 215 ++----------- src/sna/kgem.c | 8 src/sna/kgem.h | 2 src/sna/sna.h | 3 src/sna/sna_accel.c | 664 ++++++++++++++++++++++++++++++++++-------- src/sna/sna_display.c | 3 src/sna/sna_dri.c | 117 +++++-- src/sna/sna_driver.c | 49 --- src/sna/sna_reg.h | 1 src/sna/sna_render.c | 4 src/sna/sna_render.h | 4 src/sna/sna_video_hwmc.c | 4 src/sna/sna_video_textured.c | 2 29 files changed, 1326 insertions(+), 581 deletions(-)
New commits: commit 5aac93a711b13b8a33d5345bfab9d208f0ddf8a2 Author: Julien Danjou <jul...@danjou.info> Date: Sat Jul 28 14:09:15 2012 +0200 New upstream release (2.20.2) Signed-off-by: Julien Danjou <jul...@danjou.info> diff --git a/ChangeLog b/ChangeLog index 82dbbf6..588e46d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,355 @@ +commit a8ee1406244d8b8399bf933d6b61bfd14374b5f9 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Fri Jul 27 09:07:16 2012 +0100 + + 2.20.2 release + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit bef73cd9279be3438e467981db39c67bc13104f5 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 21:54:33 2012 +0100 + + sna/dri: Select the engine before emitting the wait + + So that if we have a flexible WAIT_FOR_EVENT that can go on either + pipeline, we can choose our preferred pipeline for DRI. + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 1ced4f1ddcf30b518e1760c7aa4a5ed4f934b9f5 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 10:50:31 2012 +0100 + + Reduce maximum thread count for IVB GT1 to avoid spontaneous combustion + + Somewhere along the way it seems that IVB GT1 was reduced to only allow + a maximum of 48 threads, as revealed in the lastest bspecs. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52473 + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 8f8f8759111f791ee99adfd87296443fb0e6acad +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 17:39:34 2012 +0100 + + sna/gen4: Tweak heuristics for render/blt usage + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit c9dd1401615f0ed9492a0c0f547fb37150e013d1 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 16:31:16 2012 +0100 + + sna/gen4: Bump thread counts + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 7f3fdef98c1ab2fa27439c3be9810b7a934017ce +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 15:39:05 2012 +0100 + + sna/gen7: IVB requires a complete pipeline stall when changing blend modes + + Similar to how SandyBridge behaves, I had hoped that with IvyBridge they + would have made the pipelined operation actually pipelined, but alas. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52473 + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 0938b3df8c25178c8ea0012e1ead1061d03a4e7c +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 15:21:59 2012 +0100 + + sna/dri: Add an explanatory assertion + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit c621183466aa55a5938027b702069e792df2272d +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 15:18:56 2012 +0100 + + sna/dri: Tidy fallback/normal CopyRegion + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 52d2491a1bafb979d79bb970027c55788f199acb +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 15:19:14 2012 +0100 + + sna/video: Protect against attempting to use TexturedVideo whilst wedged + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 8c0e3bbb0c301d0fa4652aa38edd84a9fd6b555e +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 14:58:04 2012 +0100 + + sna: Force the damage on the DRI pixmap to be flushed to the GPU + + Otherwise nothing will happen if we consider ourselves wedged. + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit f50d7b03b5d0b81b24f0acb4ae555545bbdaa179 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 14:47:44 2012 +0100 + + sna: Only recommend not to create bo for a pixmap whilst wedged + + This allows us to continue to map a GPU bo and operate inplace if we are + force to create a GPU bo for a compositor. + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 49403ddd1bd9a23e1b32a10e7d0757ae2897a579 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 14:44:14 2012 +0100 + + sna/dri: Mark a flush required for any new DRI already on exec/dirty lists + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit efe3c8ff48738f2a274e1d4514d32499fc4aadbc +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 14:43:33 2012 +0100 + + sna: Allow DRI to force allocation of a GPU bo and backing pixmap + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 2069384f9f06d3ef9dbb0c3f2c64cac4b24e10fc +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 14:15:45 2012 +0100 + + sna/dri: Implement fallback handling for CopyRegion whilst wedged + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit b18143de47e060b67a46d9c68590a2d35df9fca6 +Author: Paul Menzel <paulepan...@users.sourceforge.net> +Date: Thu Jul 26 12:51:57 2012 +0200 + + NEWS: Correct release version: s/2.12.0/2.20.0/ + + Signed-off-by: Paul Menzel <paulepan...@users.sourceforge.net> + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit c262d02fb5defe9100df54cf83cc00e11e335745 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 01:12:11 2012 +0100 + + Limit PCI matching to VGA devices + + Fixes X -configure + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit ad6355311b8b80777bc0fec95f6bf6cd1b4969d9 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 01:02:53 2012 +0100 + + sna: Compile against xorg-1.10 + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit b6c7c490d76c683b7b5c89d20f902603b85bd3bc +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Thu Jul 26 00:48:59 2012 +0100 + + sna: Check for a NULL scanout after DPMS off with shadow enabled + + We may mark the scanout as detached when all outputs are turned off (for + example during rotation) and so in the subsequent block handler we need + to be careful in case we are handling a detached shadow. + + Reported-by: chr....@gmx.net + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52514 + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit f4acc01bb09f68edbad4c6cc7e04c271c92661c1 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 22:43:32 2012 +0100 + + sna: Restore inplace upload for DRI compositors + + With a DRI compositor we have to flush the output after every request, + which leads to major inefficiencies. This can be mitigated slightly if + we know we will have to upload shortly, which we track using the pixmap + flush flag. + + In particular PutImage is meant to upload inplace to an active DRI + buffer, however this was accidentally dropped in + + commit a253c95ec63b2b075e66ae7380fed6a73469eba5 + Author: Chris Wilson <ch...@chris-wilson.co.uk> + Date: Sun Jul 15 13:32:35 2012 +0100 + + sna: Prefer uploads to be staged in snoopable bo + + Performace of putimage500 on ivb i7-3720qm: + bare: 4610 + gnome-shell: 3000 + patched: 3390 + + Reported-by: Michael Larabel <mich...@phoronix.com> + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 5c969a05ef815b261e157fe8d1172aebfd7f5841 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 20:28:41 2012 +0100 + + sna/gen7: Move the PS thread count definition into the constant struct + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 901bb618215d65747eb0a8d481c77c1201f69362 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 20:13:15 2012 +0100 + + sna/gen7: Remove duplicated constants + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 8c5077e4ed055a97bf9deda59c0e9a45e42317ca +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 16:59:11 2012 +0100 + + Assume all unknown chipsets are future gen + + I think the likelihood of a new product being launched based on a 8xx + design is remote enough not to worry about. + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 954d9c6aca792098cb16a46b4ec8d5fc104b15a9 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 15:41:29 2012 +0100 + + sna: Initialise single-shot tile offsets before use + + As noted by the compiler amidst all the noise. + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 484b072072c8297a87940c9e32097923f0a77c8f +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 12:22:47 2012 +0100 + + sna: Add 'gmux_backlight' to list of known devices for backwards compatability + + Reported-by: Austin Lund <austin.l...@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=52423 + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 0ab6d7a50d37cf4454577cb8c333d4b8683aa054 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 12:20:36 2012 +0100 + + sna: Prefer platform backlight devices over firmware + + This is in contrast to libbacklight but closer to our original code as + we prefer a known custom backlight controller over the ACPI interfaces. + As only the ACPI interfaces are marked as firmware, and the custom + backlight controllers as platform, we therefore need to prefer platform + backlight devices. + + Reported-by: Austin Lund <austin.l...@gmail.com> + References: https://bugs.freedesktop.org/show_bug.cgi?id=52423 + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit b6d82ab07661aba98963f239f9501b50c3910962 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 10:40:07 2012 +0100 + + sna: Reduce ping-pong for Composite with render disabled + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit de707b7dc853a3b315ead9789d5ca541412c99bc +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Wed Jul 25 09:05:46 2012 +0100 + + uxa: Add Apple's gmux to the list of known preferred backlights + + Reported-by: Austin Lund <austin.l...@gmail.com> + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52423 + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 3d254e4010d0753f433cfe62c6a7546b02482847 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 23:45:33 2012 +0100 + + sna: Use SETUP_BLT to reduce overheads for tiled BLT + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 2ed44149eaa27b9632ec83a776f3ee67b0eec7b0 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 23:45:33 2012 +0100 + + sna: Use SETUP_BLT to reduce overheads for stippled BLT + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit c1785aaca2c9347114d28f114ee59ef8206d829b +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 23:00:25 2012 +0100 + + sna: Remove a surplus function + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 554fce8a65b2518cae032f1eadf58830559299c8 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 22:59:16 2012 +0100 + + uxa: Pass the correct parameters to intel_detect_chipset() + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 40d90dfd8674c255a45b46bbdc09d497af5b3f50 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 21:55:46 2012 +0100 + + intel: Refactor the common chipset detection/override + + Reduce the duplicate messages for which type of chip we by + amalgamating the common code. + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit e3f6c48d18c316899c71b6fc34971039c6f9e5f8 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 14:43:23 2012 +0100 + + sna: Refactor PutImage to avoid calling drawable_gc_flags() too early + + drawable_gc_flags() asserts that the gc has been moved to the CPU prior + to its calls so that it can read the reduced raster operation. + + Reported-by: Zdenek Kabelac <zkabe...@redhat.com> + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit 1af26ea4228a9d7768b475b4f9164d2c7620d4fd +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 14:39:44 2012 +0100 + + sna: Flesh out tiled operations using the BLT + + Before enabling the RENDER pipeline for this operation, let's just see + what is required to fully use the BLT pipeline as well. + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + +commit ac182a006732525a921a9c539e5ebfb537ad3b52 +Author: Chris Wilson <ch...@chris-wilson.co.uk> +Date: Mon Jul 23 11:05:05 2012 +0100 + + sna: Hold a reference to the full stipple pattern for repeated tiles + + Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> + commit 83f683b47063eab8cfb5037d02133dd977c3fc25 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Jul 22 23:20:23 2012 +0100 diff --git a/debian/changelog b/debian/changelog index efe5457..62c5fa0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,6 @@ -xserver-xorg-video-intel (2:2.20.1-1) UNRELEASED; urgency=low +xserver-xorg-video-intel (2:2.20.2-1) experimental; urgency=low + [ Maarten Lankhorst ] * New upstream release: - First official release with sna * Update to 2.20.1 point release: @@ -21,7 +22,10 @@ xserver-xorg-video-intel (2:2.20.1-1) UNRELEASED; urgency=low - Shadow support was dropped from UXA as it was neither complete nor correct, use SNA instead. - -- Maarten Lankhorst <maarten.lankho...@canonical.com> Mon, 16 Jul 2012 16:21:28 +0200 + [ Julien Danjou ] + * New upstream release (2.20.2) + + -- Julien Danjou <a...@debian.org> Sun, 29 Jul 2012 00:06:11 +0200 xserver-xorg-video-intel (2:2.18.0-1+exp1) experimental; urgency=low commit a8ee1406244d8b8399bf933d6b61bfd14374b5f9 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Fri Jul 27 09:07:16 2012 +0100 2.20.2 release Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/NEWS b/NEWS index 08340f8..7e267a6 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,23 @@ +Release 2.20.2 (2012-07-27) +=========================== +For the last 9 months, since 2.16.901, we have been shipping a driver that +does not work on IvyBridge GT1 systems (HD2500 graphics); we were telling +the GPU to use an invalid number of threads for the pixel shader and this +in turned caused the GPU to hang. + +Also fixed since the last release just a few days ago: + +* Support for the gmux backlight controller on Apple laptops + https://bugs.freedesktop.org/show_bug.cgi?id=52423 + +* Fix X -configure not to list this driver as matching any Intel device, + just the VGA class devices will do! + +* A crash in SNA when repeatedly switching xrandr rotations + +* Corruption in SNA observed in kwin on IvyBridge + https://bugs.freedesktop.org/show_bug.cgi?id=52473 + Release 2.20.1 (2012-07-22) =========================== A week in, grab the brown paper bags, for it is time to reveal a couple diff --git a/configure.ac b/configure.ac index 3cdacdd..9945d5b 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) AC_INIT([xf86-video-intel], - [2.20.1], + [2.20.2], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [xf86-video-intel]) AC_CONFIG_SRCDIR([Makefile.am]) commit bef73cd9279be3438e467981db39c67bc13104f5 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 21:54:33 2012 +0100 sna/dri: Select the engine before emitting the wait So that if we have a flexible WAIT_FOR_EVENT that can go on either pipeline, we can choose our preferred pipeline for DRI. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c index 1daf1c4..8d6c305 100644 --- a/src/sna/sna_dri.c +++ b/src/sna/sna_dri.c @@ -546,6 +546,14 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, } } + if (!wedged(sna)) { + if (sync) + sync = sna_pixmap_is_scanout(sna, pixmap); + + sna_dri_select_mode(sna, src_bo, sync); + } else + sync = false; + dx = dy = 0; if (draw->type != DRAWABLE_PIXMAP) { WindowPtr win = (WindowPtr)draw; @@ -569,7 +577,7 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, region = &clip; } - if (sync && sna_pixmap_is_scanout(sna, pixmap)) { + if (sync) { crtc = sna_covering_crtc(sna->scrn, &clip.extents, NULL); if (crtc) flush = sna_wait_for_scanline(sna, pixmap, crtc, @@ -595,8 +603,6 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, dst_bo, dx, dy, boxes, n); } else { - sna_dri_select_mode(sna, src_bo, flush); - sna->render.copy_boxes(sna, GXcopy, (PixmapPtr)draw, src_bo, -draw->x, -draw->y, pixmap, dst_bo, dx, dy, commit 1ced4f1ddcf30b518e1760c7aa4a5ed4f934b9f5 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 10:50:31 2012 +0100 Reduce maximum thread count for IVB GT1 to avoid spontaneous combustion Somewhere along the way it seems that IVB GT1 was reduced to only allow a maximum of 48 threads, as revealed in the lastest bspecs. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52473 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/i965_render.c b/src/i965_render.c index 9d45944..2182df8 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -2703,7 +2703,7 @@ gen7_composite_wm_state(intel_screen_private *intel, OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH(((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | + OUT_BATCH(((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); diff --git a/src/i965_video.c b/src/i965_video.c index d9350ce..bcd6063 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -1658,7 +1658,7 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH( - ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | + ((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); OUT_BATCH( diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 167a5e6..d06b791 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -77,7 +77,7 @@ struct gt_info { static const struct gt_info gt1_info = { .max_vs_threads = 36, .max_gs_threads = 36, - .max_wm_threads = (86-1) << GEN7_PS_MAX_THREADS_SHIFT, + .max_wm_threads = (48-1) << GEN7_PS_MAX_THREADS_SHIFT, .urb = { 128, 512, 192 }, }; commit 8f8f8759111f791ee99adfd87296443fb0e6acad Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 17:39:34 2012 +0100 sna/gen4: Tweak heuristics for render/blt usage Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index d62d744..25229e1 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -1125,7 +1125,7 @@ static bool gen4_rectangle_begin(struct sna *sna, static int gen4_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { - if (!kgem_check_batch(&sna->kgem, 25)) + if (!kgem_check_batch(&sna->kgem, (FLUSH_EVERY_VERTEX || op->need_magic_ca_pass) ? 25 : 6)) return 0; if (!kgem_check_reloc_and_exec(&sna->kgem, 1)) return 0; @@ -1145,9 +1145,9 @@ inline static int gen4_get_rectangles(struct sna *sna, start: rem = vertex_space(sna); - if (rem < 3*op->floats_per_vertex) { + if (rem < op->floats_per_rect) { DBG(("flushing vbo for %s: %d < %d\n", - __FUNCTION__, rem, 3*op->floats_per_vertex)); + __FUNCTION__, rem, op->floats_per_rect)); rem = gen4_get_rectangles__flush(sna, op); if (unlikely(rem == 0)) goto flush; @@ -1157,8 +1157,8 @@ start: !gen4_rectangle_begin(sna, op))) goto flush; - if (want > 1 && want * op->floats_per_vertex*3 > rem) - want = rem / (3*op->floats_per_vertex); + if (want > 1 && want * op->floats_per_rect > rem) + want = rem / op->floats_per_rect; sna->render.vertex_index += 3*want; return want; @@ -1173,8 +1173,9 @@ flush: goto start; } -static uint32_t *gen4_composite_get_binding_table(struct sna *sna, - uint16_t *offset) +static uint32_t * +gen4_composite_get_binding_table(struct sna *sna, + uint16_t *offset) { sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); @@ -1288,7 +1289,7 @@ static void gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op) { if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 6*op->floats_per_vertex) + if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) gen4_vertex_finish(sna); DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", @@ -1335,8 +1336,8 @@ gen4_emit_pipelined_pointers(struct sna *sna, kernel, blend, op->has_component_alpha, (int)op->dst.format)); sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat, - op->mask.filter, op->mask.repeat, - kernel); + op->mask.filter, op->mask.repeat, + kernel); bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format); key = op->mask.bo != NULL; @@ -1371,7 +1372,7 @@ gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) sna->render_state.gen4.drawrect_limit = limit; OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); - OUT_BATCH(0x00000000); + OUT_BATCH(0); OUT_BATCH(limit); OUT_BATCH(offset); } @@ -1713,11 +1714,14 @@ gen4_render_video(struct sna *sna, tmp.src.filter = SAMPLER_FILTER_BILINEAR; tmp.src.repeat = SAMPLER_EXTEND_PAD; + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; tmp.u.gen4.wm_kernel = is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; + tmp.u.gen4.ve_id = 1; tmp.is_affine = true; tmp.floats_per_vertex = 3; - tmp.u.gen4.ve_id = 1; + tmp.floats_per_rect = 9; tmp.priv = frame; if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { @@ -2078,13 +2082,7 @@ picture_is_cpu(PicturePtr picture) if (!picture->pDrawable) return false; - /* If it is a solid, try to use the render paths */ - if (picture->pDrawable->width == 1 && - picture->pDrawable->height == 1 && - picture->repeat) - return false; - - return is_cpu(picture->pDrawable); + return is_cpu(picture->pDrawable) || is_dirty(picture->pDrawable); } static inline bool prefer_blt(struct sna *sna) @@ -2099,7 +2097,7 @@ static inline bool prefer_blt(struct sna *sna) static bool try_blt(struct sna *sna, - PicturePtr source, + PicturePtr dst, PicturePtr src, int width, int height) { if (prefer_blt(sna)) { @@ -2113,8 +2111,15 @@ try_blt(struct sna *sna, return true; } + if (too_large(dst->pDrawable->width, dst->pDrawable->height)) + return true; + + /* The blitter is much faster for solids */ + if (sna_picture_is_solid(src, NULL)) + return true; + /* is the source picture only in cpu memory e.g. a shm pixmap? */ - return picture_is_cpu(source); + return picture_is_cpu(src); } static bool @@ -2144,7 +2149,7 @@ untransformed(PicturePtr p) static bool need_upload(PicturePtr p) { - return p->pDrawable && unattached(p->pDrawable) && untransformed(p); + return p->pDrawable && untransformed(p) && is_cpu(p->pDrawable); } static bool @@ -2234,12 +2239,12 @@ gen4_composite_fallback(struct sna *sna, return false; } - if (!src_fallback) { + if (src_pixmap && !src_fallback) { DBG(("%s: src is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; } - if (mask && !mask_fallback) { + if (mask_pixmap && !mask_fallback) { DBG(("%s: mask is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -2255,7 +2260,7 @@ gen4_composite_fallback(struct sna *sna, return true; } - if (mask && mask_fallback) { + if (mask_fallback) { DBG(("%s: dst is on the CPU and mask will fallback\n", __FUNCTION__)); return true; @@ -2358,7 +2363,7 @@ gen4_render_composite(struct sna *sna, #endif if (mask == NULL && - try_blt(sna, src, width, height) && + try_blt(sna, dst, src, width, height) && sna_blt_composite(sna, op, src, dst, src_x, src_y, @@ -2464,11 +2469,9 @@ gen4_render_composite(struct sna *sna, else if (tmp->src.is_affine) tmp->prim_emit = gen4_emit_composite_primitive_affine_source; - tmp->mask.filter = SAMPLER_FILTER_NEAREST; - tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->floats_per_vertex = 3 + !tmp->is_affine; } + tmp->floats_per_rect = 3*tmp->floats_per_vertex; tmp->u.gen4.wm_kernel = gen4_choose_composite_kernel(tmp->op, @@ -2705,7 +2708,7 @@ gen4_render_composite_spans_done(struct sna *sna, static bool gen4_check_composite_spans(struct sna *sna, uint8_t op, PicturePtr src, PicturePtr dst, - int16_t width, int16_t height, + int16_t width, int16_t height, unsigned flags) { if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) @@ -2717,7 +2720,7 @@ gen4_check_composite_spans(struct sna *sna, if (gen4_composite_fallback(sna, src, NULL, dst)) return false; - if (!is_gpu(dst->pDrawable)) + if (need_tiling(sna, width, height) && !is_gpu(dst->pDrawable)) return false; return true; @@ -2999,9 +3002,8 @@ fallback_blt: extents.x1 + src_dx, extents.y1 + src_dy, extents.x2 - extents.x1, - extents.y2 - extents.y1)) { + extents.y2 - extents.y1)) goto fallback_tiled_dst; - } } else { tmp.src.bo = kgem_bo_reference(src_bo); tmp.src.width = src->drawable.width; @@ -3011,10 +3013,9 @@ fallback_blt: tmp.src.scale[1] = 1.f/src->drawable.height; } - tmp.mask.bo = NULL; - tmp.is_affine = true; tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; tmp.u.gen4.wm_kernel = WM_KERNEL; tmp.u.gen4.ve_id = 1; @@ -3041,6 +3042,8 @@ fallback_blt: box->x1 + dst_dx, box->y1 + dst_dy); box++; } while (--n); + + gen4_vertex_flush(sna); sna_render_composite_redirect_done(sna, &tmp); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -3115,6 +3118,7 @@ fallback: dst->drawable.bitsPerPixel, op); } + if (dst->drawable.depth == src->drawable.depth) { op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); op->base.src.pict_format = op->base.dst.format; @@ -3142,10 +3146,9 @@ fallback: op->base.src.filter = SAMPLER_FILTER_NEAREST; op->base.src.repeat = SAMPLER_EXTEND_NONE; - op->base.mask.bo = NULL; - op->base.is_affine = true; op->base.floats_per_vertex = 3; + op->base.floats_per_rect = 9; op->base.u.gen4.wm_kernel = WM_KERNEL; op->base.u.gen4.ve_id = 1; @@ -3155,6 +3158,15 @@ fallback: goto fallback; } + if (kgem_bo_is_dirty(src_bo)) { + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op)) + return true; + } + gen4_copy_bind_surfaces(sna, &op->base); gen4_align_vertex(sna, &op->base); @@ -3296,6 +3308,7 @@ gen4_render_fill_boxes(struct sna *sna, tmp.is_affine = true; tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; tmp.u.gen4.wm_kernel = WM_KERNEL; tmp.u.gen4.ve_id = 1; @@ -3315,6 +3328,7 @@ gen4_render_fill_boxes(struct sna *sna, box++; } while (--n); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; } @@ -3397,6 +3411,9 @@ gen4_render_fill(struct sna *sna, uint8_t alu, op->base.dst.bo = dst_bo; op->base.dst.x = op->base.dst.y = 0; + op->base.need_magic_ca_pass = 0; + op->base.has_component_alpha = 0; + op->base.src.bo = sna_render_get_solid(sna, sna_rgba_for_color(color, @@ -3410,8 +3427,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu, op->base.is_affine = true; op->base.floats_per_vertex = 3; - op->base.need_magic_ca_pass = 0; - op->base.has_component_alpha = 0; + op->base.floats_per_rect = 9; op->base.u.gen4.wm_kernel = WM_KERNEL; op->base.u.gen4.ve_id = 1; @@ -3498,6 +3514,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, tmp.is_affine = true; tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; tmp.has_component_alpha = 0; tmp.need_magic_ca_pass = false; @@ -3514,8 +3531,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1); - if (sna->render_state.gen4.vertex_offset) - gen4_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -3538,6 +3554,31 @@ discard_vbo(struct sna *sna) sna->render.vertex_index = 0; } +static void +gen4_render_retire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { + DBG(("%s: resetting idle vbo\n", __FUNCTION__)); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} + +static void +gen4_render_expire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (sna->render.vbo && !sna->render.vertex_used) { + DBG(("%s: discarding vbo\n", __FUNCTION__)); + discard_vbo(sna); + } +} + static void gen4_render_reset(struct sna *sna) { sna->render_state.gen4.needs_invariant = true; @@ -3807,6 +3848,9 @@ bool gen4_render_init(struct sna *sna) if (!gen4_render_setup(sna)) return false; + sna->kgem.retire = gen4_render_retire; + sna->kgem.expire = gen4_render_expire; + sna->render.composite = gen4_render_composite; #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen4_check_composite_spans; diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index db7eb7b..3af9097 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -2096,8 +2096,6 @@ picture_is_cpu(PicturePtr picture) if (!picture->pDrawable) return false; - if (too_large(picture->pDrawable->width, picture->pDrawable->height)) - return true; return is_cpu(picture->pDrawable) || is_dirty(picture->pDrawable); } @@ -2731,7 +2729,8 @@ gen5_render_composite_spans_done(struct sna *sna, static bool gen5_check_composite_spans(struct sna *sna, uint8_t op, PicturePtr src, PicturePtr dst, - int16_t width, int16_t height, unsigned flags) + int16_t width, int16_t height, + unsigned flags) { if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) return false; commit c9dd1401615f0ed9492a0c0f547fb37150e013d1 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 16:31:16 2012 +0100 sna/gen4: Bump thread counts Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index bc37615..d62d744 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -80,7 +80,7 @@ #define URB_CS_ENTRIES 0 #define URB_VS_ENTRY_SIZE 1 // each 512-bit row -#define URB_VS_ENTRIES 8 // we needs at least 8 entries +#define URB_VS_ENTRIES 32 // we needs at least 8 entries #define URB_GS_ENTRY_SIZE 0 #define URB_GS_ENTRIES 0 -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1svl4t-0004db...@vasks.debian.org