NEWS | 22 + configure.ac | 2 src/i965_render.c | 2 src/i965_video.c | 2 src/intel.h | 2 src/intel_display.c | 1 src/intel_driver.c | 34 -- src/intel_driver.h | 17 - src/intel_module.c | 33 +- src/legacy/i810/i810_driver.c | 33 -- src/sna/fb/fbpict.c | 28 + src/sna/gen4_render.c | 242 +++++++++------ src/sna/gen5_render.c | 5 src/sna/gen7_render.c | 30 + src/sna/gen7_render.h | 215 ++----------- src/sna/kgem.c | 8 src/sna/kgem.h | 2 src/sna/sna.h | 3 src/sna/sna_accel.c | 664 ++++++++++++++++++++++++++++++++++-------- src/sna/sna_display.c | 3 src/sna/sna_dri.c | 117 +++++-- src/sna/sna_driver.c | 49 --- src/sna/sna_reg.h | 1 src/sna/sna_render.c | 4 src/sna/sna_render.h | 4 src/sna/sna_video_hwmc.c | 4 src/sna/sna_video_textured.c | 2 27 files changed, 956 insertions(+), 573 deletions(-)
New commits: commit a8ee1406244d8b8399bf933d6b61bfd14374b5f9 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Fri Jul 27 09:07:16 2012 +0100 2.20.2 release Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/NEWS b/NEWS index 08340f8..7e267a6 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,23 @@ +Release 2.20.2 (2012-07-27) +=========================== +For the last 9 months, since 2.16.901, we have been shipping a driver that +does not work on IvyBridge GT1 systems (HD2500 graphics); we were telling +the GPU to use an invalid number of threads for the pixel shader and this +in turned caused the GPU to hang. + +Also fixed since the last release just a few days ago: + +* Support for the gmux backlight controller on Apple laptops + https://bugs.freedesktop.org/show_bug.cgi?id=52423 + +* Fix X -configure not to list this driver as matching any Intel device, + just the VGA class devices will do! + +* A crash in SNA when repeatedly switching xrandr rotations + +* Corruption in SNA observed in kwin on IvyBridge + https://bugs.freedesktop.org/show_bug.cgi?id=52473 + Release 2.20.1 (2012-07-22) =========================== A week in, grab the brown paper bags, for it is time to reveal a couple diff --git a/configure.ac b/configure.ac index 3cdacdd..9945d5b 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) AC_INIT([xf86-video-intel], - [2.20.1], + [2.20.2], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [xf86-video-intel]) AC_CONFIG_SRCDIR([Makefile.am]) commit bef73cd9279be3438e467981db39c67bc13104f5 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 21:54:33 2012 +0100 sna/dri: Select the engine before emitting the wait So that if we have a flexible WAIT_FOR_EVENT that can go on either pipeline, we can choose our preferred pipeline for DRI. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c index 1daf1c4..8d6c305 100644 --- a/src/sna/sna_dri.c +++ b/src/sna/sna_dri.c @@ -546,6 +546,14 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, } } + if (!wedged(sna)) { + if (sync) + sync = sna_pixmap_is_scanout(sna, pixmap); + + sna_dri_select_mode(sna, src_bo, sync); + } else + sync = false; + dx = dy = 0; if (draw->type != DRAWABLE_PIXMAP) { WindowPtr win = (WindowPtr)draw; @@ -569,7 +577,7 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, region = &clip; } - if (sync && sna_pixmap_is_scanout(sna, pixmap)) { + if (sync) { crtc = sna_covering_crtc(sna->scrn, &clip.extents, NULL); if (crtc) flush = sna_wait_for_scanline(sna, pixmap, crtc, @@ -595,8 +603,6 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, dst_bo, dx, dy, boxes, n); } else { - sna_dri_select_mode(sna, src_bo, flush); - sna->render.copy_boxes(sna, GXcopy, (PixmapPtr)draw, src_bo, -draw->x, -draw->y, pixmap, dst_bo, dx, dy, commit 1ced4f1ddcf30b518e1760c7aa4a5ed4f934b9f5 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 10:50:31 2012 +0100 Reduce maximum thread count for IVB GT1 to avoid spontaneous combustion Somewhere along the way it seems that IVB GT1 was reduced to only allow a maximum of 48 threads, as revealed in the lastest bspecs. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52473 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/i965_render.c b/src/i965_render.c index 9d45944..2182df8 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -2703,7 +2703,7 @@ gen7_composite_wm_state(intel_screen_private *intel, OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH(((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | + OUT_BATCH(((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); diff --git a/src/i965_video.c b/src/i965_video.c index d9350ce..bcd6063 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -1658,7 +1658,7 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH( - ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | + ((48 - 1) << GEN7_PS_MAX_THREADS_SHIFT) | GEN7_PS_ATTRIBUTE_ENABLE | GEN7_PS_16_DISPATCH_ENABLE); OUT_BATCH( diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 167a5e6..d06b791 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -77,7 +77,7 @@ struct gt_info { static const struct gt_info gt1_info = { .max_vs_threads = 36, .max_gs_threads = 36, - .max_wm_threads = (86-1) << GEN7_PS_MAX_THREADS_SHIFT, + .max_wm_threads = (48-1) << GEN7_PS_MAX_THREADS_SHIFT, .urb = { 128, 512, 192 }, }; commit 8f8f8759111f791ee99adfd87296443fb0e6acad Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 17:39:34 2012 +0100 sna/gen4: Tweak heuristics for render/blt usage Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index d62d744..25229e1 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -1125,7 +1125,7 @@ static bool gen4_rectangle_begin(struct sna *sna, static int gen4_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { - if (!kgem_check_batch(&sna->kgem, 25)) + if (!kgem_check_batch(&sna->kgem, (FLUSH_EVERY_VERTEX || op->need_magic_ca_pass) ? 25 : 6)) return 0; if (!kgem_check_reloc_and_exec(&sna->kgem, 1)) return 0; @@ -1145,9 +1145,9 @@ inline static int gen4_get_rectangles(struct sna *sna, start: rem = vertex_space(sna); - if (rem < 3*op->floats_per_vertex) { + if (rem < op->floats_per_rect) { DBG(("flushing vbo for %s: %d < %d\n", - __FUNCTION__, rem, 3*op->floats_per_vertex)); + __FUNCTION__, rem, op->floats_per_rect)); rem = gen4_get_rectangles__flush(sna, op); if (unlikely(rem == 0)) goto flush; @@ -1157,8 +1157,8 @@ start: !gen4_rectangle_begin(sna, op))) goto flush; - if (want > 1 && want * op->floats_per_vertex*3 > rem) - want = rem / (3*op->floats_per_vertex); + if (want > 1 && want * op->floats_per_rect > rem) + want = rem / op->floats_per_rect; sna->render.vertex_index += 3*want; return want; @@ -1173,8 +1173,9 @@ flush: goto start; } -static uint32_t *gen4_composite_get_binding_table(struct sna *sna, - uint16_t *offset) +static uint32_t * +gen4_composite_get_binding_table(struct sna *sna, + uint16_t *offset) { sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); @@ -1288,7 +1289,7 @@ static void gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op) { if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 6*op->floats_per_vertex) + if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) gen4_vertex_finish(sna); DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", @@ -1335,8 +1336,8 @@ gen4_emit_pipelined_pointers(struct sna *sna, kernel, blend, op->has_component_alpha, (int)op->dst.format)); sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat, - op->mask.filter, op->mask.repeat, - kernel); + op->mask.filter, op->mask.repeat, + kernel); bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format); key = op->mask.bo != NULL; @@ -1371,7 +1372,7 @@ gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) sna->render_state.gen4.drawrect_limit = limit; OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); - OUT_BATCH(0x00000000); + OUT_BATCH(0); OUT_BATCH(limit); OUT_BATCH(offset); } @@ -1713,11 +1714,14 @@ gen4_render_video(struct sna *sna, tmp.src.filter = SAMPLER_FILTER_BILINEAR; tmp.src.repeat = SAMPLER_EXTEND_PAD; + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; tmp.u.gen4.wm_kernel = is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; + tmp.u.gen4.ve_id = 1; tmp.is_affine = true; tmp.floats_per_vertex = 3; - tmp.u.gen4.ve_id = 1; + tmp.floats_per_rect = 9; tmp.priv = frame; if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { @@ -2078,13 +2082,7 @@ picture_is_cpu(PicturePtr picture) if (!picture->pDrawable) return false; - /* If it is a solid, try to use the render paths */ - if (picture->pDrawable->width == 1 && - picture->pDrawable->height == 1 && - picture->repeat) - return false; - - return is_cpu(picture->pDrawable); + return is_cpu(picture->pDrawable) || is_dirty(picture->pDrawable); } static inline bool prefer_blt(struct sna *sna) @@ -2099,7 +2097,7 @@ static inline bool prefer_blt(struct sna *sna) static bool try_blt(struct sna *sna, - PicturePtr source, + PicturePtr dst, PicturePtr src, int width, int height) { if (prefer_blt(sna)) { @@ -2113,8 +2111,15 @@ try_blt(struct sna *sna, return true; } + if (too_large(dst->pDrawable->width, dst->pDrawable->height)) + return true; + + /* The blitter is much faster for solids */ + if (sna_picture_is_solid(src, NULL)) + return true; + /* is the source picture only in cpu memory e.g. a shm pixmap? */ - return picture_is_cpu(source); + return picture_is_cpu(src); } static bool @@ -2144,7 +2149,7 @@ untransformed(PicturePtr p) static bool need_upload(PicturePtr p) { - return p->pDrawable && unattached(p->pDrawable) && untransformed(p); + return p->pDrawable && untransformed(p) && is_cpu(p->pDrawable); } static bool @@ -2234,12 +2239,12 @@ gen4_composite_fallback(struct sna *sna, return false; } - if (!src_fallback) { + if (src_pixmap && !src_fallback) { DBG(("%s: src is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; } - if (mask && !mask_fallback) { + if (mask_pixmap && !mask_fallback) { DBG(("%s: mask is already on the GPU, try to use GPU\n", __FUNCTION__)); return false; @@ -2255,7 +2260,7 @@ gen4_composite_fallback(struct sna *sna, return true; } - if (mask && mask_fallback) { + if (mask_fallback) { DBG(("%s: dst is on the CPU and mask will fallback\n", __FUNCTION__)); return true; @@ -2358,7 +2363,7 @@ gen4_render_composite(struct sna *sna, #endif if (mask == NULL && - try_blt(sna, src, width, height) && + try_blt(sna, dst, src, width, height) && sna_blt_composite(sna, op, src, dst, src_x, src_y, @@ -2464,11 +2469,9 @@ gen4_render_composite(struct sna *sna, else if (tmp->src.is_affine) tmp->prim_emit = gen4_emit_composite_primitive_affine_source; - tmp->mask.filter = SAMPLER_FILTER_NEAREST; - tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->floats_per_vertex = 3 + !tmp->is_affine; } + tmp->floats_per_rect = 3*tmp->floats_per_vertex; tmp->u.gen4.wm_kernel = gen4_choose_composite_kernel(tmp->op, @@ -2705,7 +2708,7 @@ gen4_render_composite_spans_done(struct sna *sna, static bool gen4_check_composite_spans(struct sna *sna, uint8_t op, PicturePtr src, PicturePtr dst, - int16_t width, int16_t height, + int16_t width, int16_t height, unsigned flags) { if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) @@ -2717,7 +2720,7 @@ gen4_check_composite_spans(struct sna *sna, if (gen4_composite_fallback(sna, src, NULL, dst)) return false; - if (!is_gpu(dst->pDrawable)) + if (need_tiling(sna, width, height) && !is_gpu(dst->pDrawable)) return false; return true; @@ -2999,9 +3002,8 @@ fallback_blt: extents.x1 + src_dx, extents.y1 + src_dy, extents.x2 - extents.x1, - extents.y2 - extents.y1)) { + extents.y2 - extents.y1)) goto fallback_tiled_dst; - } } else { tmp.src.bo = kgem_bo_reference(src_bo); tmp.src.width = src->drawable.width; @@ -3011,10 +3013,9 @@ fallback_blt: tmp.src.scale[1] = 1.f/src->drawable.height; } - tmp.mask.bo = NULL; - tmp.is_affine = true; tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; tmp.u.gen4.wm_kernel = WM_KERNEL; tmp.u.gen4.ve_id = 1; @@ -3041,6 +3042,8 @@ fallback_blt: box->x1 + dst_dx, box->y1 + dst_dy); box++; } while (--n); + + gen4_vertex_flush(sna); sna_render_composite_redirect_done(sna, &tmp); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -3115,6 +3118,7 @@ fallback: dst->drawable.bitsPerPixel, op); } + if (dst->drawable.depth == src->drawable.depth) { op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); op->base.src.pict_format = op->base.dst.format; @@ -3142,10 +3146,9 @@ fallback: op->base.src.filter = SAMPLER_FILTER_NEAREST; op->base.src.repeat = SAMPLER_EXTEND_NONE; - op->base.mask.bo = NULL; - op->base.is_affine = true; op->base.floats_per_vertex = 3; + op->base.floats_per_rect = 9; op->base.u.gen4.wm_kernel = WM_KERNEL; op->base.u.gen4.ve_id = 1; @@ -3155,6 +3158,15 @@ fallback: goto fallback; } + if (kgem_bo_is_dirty(src_bo)) { + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op)) + return true; + } + gen4_copy_bind_surfaces(sna, &op->base); gen4_align_vertex(sna, &op->base); @@ -3296,6 +3308,7 @@ gen4_render_fill_boxes(struct sna *sna, tmp.is_affine = true; tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; tmp.u.gen4.wm_kernel = WM_KERNEL; tmp.u.gen4.ve_id = 1; @@ -3315,6 +3328,7 @@ gen4_render_fill_boxes(struct sna *sna, box++; } while (--n); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; } @@ -3397,6 +3411,9 @@ gen4_render_fill(struct sna *sna, uint8_t alu, op->base.dst.bo = dst_bo; op->base.dst.x = op->base.dst.y = 0; + op->base.need_magic_ca_pass = 0; + op->base.has_component_alpha = 0; + op->base.src.bo = sna_render_get_solid(sna, sna_rgba_for_color(color, @@ -3410,8 +3427,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu, op->base.is_affine = true; op->base.floats_per_vertex = 3; - op->base.need_magic_ca_pass = 0; - op->base.has_component_alpha = 0; + op->base.floats_per_rect = 9; op->base.u.gen4.wm_kernel = WM_KERNEL; op->base.u.gen4.ve_id = 1; @@ -3498,6 +3514,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, tmp.is_affine = true; tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; tmp.has_component_alpha = 0; tmp.need_magic_ca_pass = false; @@ -3514,8 +3531,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1); - if (sna->render_state.gen4.vertex_offset) - gen4_vertex_flush(sna); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; @@ -3538,6 +3554,31 @@ discard_vbo(struct sna *sna) sna->render.vertex_index = 0; } +static void +gen4_render_retire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { + DBG(("%s: resetting idle vbo\n", __FUNCTION__)); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} + +static void +gen4_render_expire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (sna->render.vbo && !sna->render.vertex_used) { + DBG(("%s: discarding vbo\n", __FUNCTION__)); + discard_vbo(sna); + } +} + static void gen4_render_reset(struct sna *sna) { sna->render_state.gen4.needs_invariant = true; @@ -3807,6 +3848,9 @@ bool gen4_render_init(struct sna *sna) if (!gen4_render_setup(sna)) return false; + sna->kgem.retire = gen4_render_retire; + sna->kgem.expire = gen4_render_expire; + sna->render.composite = gen4_render_composite; #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen4_check_composite_spans; diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index db7eb7b..3af9097 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -2096,8 +2096,6 @@ picture_is_cpu(PicturePtr picture) if (!picture->pDrawable) return false; - if (too_large(picture->pDrawable->width, picture->pDrawable->height)) - return true; return is_cpu(picture->pDrawable) || is_dirty(picture->pDrawable); } @@ -2731,7 +2729,8 @@ gen5_render_composite_spans_done(struct sna *sna, static bool gen5_check_composite_spans(struct sna *sna, uint8_t op, PicturePtr src, PicturePtr dst, - int16_t width, int16_t height, unsigned flags) + int16_t width, int16_t height, + unsigned flags) { if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) return false; commit c9dd1401615f0ed9492a0c0f547fb37150e013d1 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 16:31:16 2012 +0100 sna/gen4: Bump thread counts Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index bc37615..d62d744 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -80,7 +80,7 @@ #define URB_CS_ENTRIES 0 #define URB_VS_ENTRY_SIZE 1 // each 512-bit row -#define URB_VS_ENTRIES 8 // we needs at least 8 entries +#define URB_VS_ENTRIES 32 // we needs at least 8 entries #define URB_GS_ENTRY_SIZE 0 #define URB_GS_ENTRIES 0 @@ -89,7 +89,7 @@ #define URB_CLIP_ENTRIES 0 #define URB_SF_ENTRY_SIZE 2 -#define URB_SF_ENTRIES 1 +#define URB_SF_ENTRIES 8 /* * this program computes dA/dx and dA/dy for the texture coordinates along @@ -97,10 +97,18 @@ */ #define SF_KERNEL_NUM_GRF 16 -#define SF_MAX_THREADS 2 #define PS_KERNEL_NUM_GRF 32 -#define PS_MAX_THREADS 48 + +static const struct gt_info { + uint32_t max_sf_threads; + uint32_t max_wm_threads; + uint32_t urb_size; +} gen4_gt_info = { + 16, 32, 256, +}, g4x_gt_info = { + 32, 50, 384, +}; static const uint32_t sf_kernel[][4] = { #include "exa_sf.g4b" @@ -3569,34 +3577,35 @@ static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream) } static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, + const struct gt_info *info, uint32_t kernel) { - struct gen4_sf_unit_state *sf_state; + struct gen4_sf_unit_state *sf; - sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32); + sf = sna_static_stream_map(stream, sizeof(*sf), 32); - sf_state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF); - sf_state->thread0.kernel_start_pointer = kernel >> 6; - sf_state->sf1.single_program_flow = 1; + sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + sf->thread0.kernel_start_pointer = kernel >> 6; + sf->sf1.single_program_flow = 1; /* scratch space is not used in our kernel */ - sf_state->thread2.scratch_space_base_pointer = 0; - sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ - sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ - sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + sf->thread2.scratch_space_base_pointer = 0; + sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */ + sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ /* don't smash vertex header, read start from dw8 */ - sf_state->thread3.urb_entry_read_offset = 1; - sf_state->thread3.dispatch_grf_start_reg = 3; - sf_state->thread4.max_threads = SF_MAX_THREADS - 1; - sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; - sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; - sf_state->sf5.viewport_transform = false; /* skip viewport */ - sf_state->sf6.cull_mode = GEN4_CULLMODE_NONE; - sf_state->sf6.scissor = 0; - sf_state->sf7.trifan_pv = 2; - sf_state->sf6.dest_org_vbias = 0x8; - sf_state->sf6.dest_org_hbias = 0x8; + sf->thread3.urb_entry_read_offset = 1; + sf->thread3.dispatch_grf_start_reg = 3; + sf->thread4.max_threads = info->max_sf_threads - 1; + sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + sf->thread4.nr_urb_entries = URB_SF_ENTRIES; + sf->sf5.viewport_transform = false; /* skip viewport */ + sf->sf6.cull_mode = GEN4_CULLMODE_NONE; + sf->sf6.scissor = 0; + sf->sf7.trifan_pv = 2; + sf->sf6.dest_org_vbias = 0x8; + sf->sf6.dest_org_hbias = 0x8; - return sna_static_stream_offsetof(stream, sf_state); + return sna_static_stream_offsetof(stream, sf); } static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream, @@ -3616,47 +3625,48 @@ static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream, return sna_static_stream_offsetof(stream, sampler_state); } -static void gen4_init_wm_state(struct gen4_wm_unit_state *state, +static void gen4_init_wm_state(struct gen4_wm_unit_state *wm, + const struct gt_info *info, bool has_mask, uint32_t kernel, uint32_t sampler) { - state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF); - state->thread0.kernel_start_pointer = kernel >> 6; + wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + wm->thread0.kernel_start_pointer = kernel >> 6; - state->thread1.single_program_flow = 0; + wm->thread1.single_program_flow = 0; /* scratch space is not used in our kernel */ - state->thread2.scratch_space_base_pointer = 0; - state->thread2.per_thread_scratch_space = 0; + wm->thread2.scratch_space_base_pointer = 0; + wm->thread2.per_thread_scratch_space = 0; - state->thread3.const_urb_entry_read_length = 0; - state->thread3.const_urb_entry_read_offset = 0; + wm->thread3.const_urb_entry_read_length = 0; + wm->thread3.const_urb_entry_read_offset = 0; - state->thread3.urb_entry_read_offset = 0; + wm->thread3.urb_entry_read_offset = 0; /* wm kernel use urb from 3, see wm_program in compiler module */ - state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ + wm->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ - state->wm4.sampler_count = 1; /* 1-4 samplers */ + wm->wm4.sampler_count = 1; /* 1-4 samplers */ - state->wm4.sampler_state_pointer = sampler >> 5; - state->wm5.max_threads = PS_MAX_THREADS - 1; - state->wm5.transposed_urb_read = 0; - state->wm5.thread_dispatch_enable = 1; + wm->wm4.sampler_state_pointer = sampler >> 5; + wm->wm5.max_threads = info->max_wm_threads - 1; + wm->wm5.transposed_urb_read = 0; + wm->wm5.thread_dispatch_enable = 1; /* just use 16-pixel dispatch (4 subspans), don't need to change kernel * start point */ - state->wm5.enable_16_pix = 1; - state->wm5.enable_8_pix = 0; - state->wm5.early_depth_test = 1; + wm->wm5.enable_16_pix = 1; + wm->wm5.enable_8_pix = 0; + wm->wm5.early_depth_test = 1; /* Each pair of attributes (src/mask coords) is two URB entries */ if (has_mask) { - state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ - state->thread3.urb_entry_read_length = 4; + wm->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ + wm->thread3.urb_entry_read_length = 4; } else { - state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ - state->thread3.urb_entry_read_length = 2; + wm->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ + wm->thread3.urb_entry_read_length = 2; } } @@ -3716,9 +3726,15 @@ static bool gen4_render_setup(struct sna *sna) struct gen4_render_state *state = &sna->render_state.gen4; struct sna_static_stream general; struct gen4_wm_unit_state_padded *wm_state; + const struct gt_info *info; uint32_t sf[2], wm[KERNEL_COUNT]; int i, j, k, l, m; + if (sna->kgem.gen == 45) + info = &g4x_gt_info; + else + info = &gen4_gt_info; + sna_static_stream_init(&general); /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer @@ -3744,8 +3760,8 @@ static bool gen4_render_setup(struct sna *sna) state->vs = gen4_create_vs_unit_state(&general); - state->sf[0] = gen4_create_sf_state(&general, sf[0]); - state->sf[1] = gen4_create_sf_state(&general, sf[1]); + state->sf[0] = gen4_create_sf_state(&general, info, sf[0]); + state->sf[1] = gen4_create_sf_state(&general, info, sf[1]); /* Set up the WM states: each filter/extend type for source and mask, per @@ -3769,7 +3785,7 @@ static bool gen4_render_setup(struct sna *sna) k, l); for (m = 0; m < KERNEL_COUNT; m++) { - gen4_init_wm_state(&wm_state->state, + gen4_init_wm_state(&wm_state->state, info, wm_kernels[m].has_mask, wm[m], sampler_state); commit 7f3fdef98c1ab2fa27439c3be9810b7a934017ce Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 15:39:05 2012 +0100 sna/gen7: IVB requires a complete pipeline stall when changing blend modes Similar to how SandyBridge behaves, I had hoped that with IvyBridge they would have made the pipelined operation actually pipelined, but alas. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52473 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index cf56e42..167a5e6 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -1031,7 +1031,7 @@ gen7_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t wm_binding_table) { - bool need_stall = false; + bool need_stall; if (sna->render_state.gen7.emit_flush) gen7_emit_pipe_flush(sna); @@ -1042,7 +1042,10 @@ gen7_emit_state(struct sna *sna, gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); gen7_emit_vertex_elements(sna, op); - need_stall |= gen7_emit_binding_table(sna, wm_binding_table); + need_stall = false; + if (wm_binding_table & 1) + need_stall = GEN7_BLEND(op->u.gen7.flags) != NO_BLEND; + need_stall |= gen7_emit_binding_table(sna, wm_binding_table & ~1); need_stall &= gen7_emit_drawing_rectangle(sna, op); if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { @@ -1787,8 +1790,10 @@ static void gen7_emit_composite_state(struct sna *sna, { uint32_t *binding_table; uint16_t offset; + bool dirty; gen7_get_batch(sna); + dirty = kgem_bo_is_dirty(op->dst.bo); binding_table = gen7_composite_get_binding_table(sna, &offset); @@ -1820,7 +1825,7 @@ static void gen7_emit_composite_state(struct sna *sna, offset = sna->render_state.gen7.surface_table; } - gen7_emit_state(sna, op, offset); + gen7_emit_state(sna, op, offset | dirty); } static void @@ -3329,6 +3334,7 @@ gen7_emit_copy_state(struct sna *sna, offset = sna->render_state.gen7.surface_table; } + assert(GEN7_BLEND(op->u.gen7.flags) == NO_BLEND); gen7_emit_state(sna, op, offset); } @@ -3705,6 +3711,7 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) { uint32_t *binding_table; uint16_t offset; + bool dirty; /* XXX Render Target Fast Clear * Set RTFC Enable in PS and render a rectangle. @@ -3713,6 +3720,7 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) */ gen7_get_batch(sna); + dirty = kgem_bo_is_dirty(op->dst.bo); binding_table = gen7_composite_get_binding_table(sna, &offset); @@ -3734,7 +3742,7 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) offset = sna->render_state.gen7.surface_table; } - gen7_emit_state(sna, op, offset); + gen7_emit_state(sna, op, offset | dirty); } static inline bool prefer_blt_fill(struct sna *sna, commit 0938b3df8c25178c8ea0012e1ead1061d03a4e7c Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 15:21:59 2012 +0100 sna/dri: Add an explanatory assertion Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c index d647c02..1daf1c4 100644 --- a/src/sna/sna_dri.c +++ b/src/sna/sna_dri.c @@ -397,6 +397,7 @@ static void damage(PixmapPtr pixmap, RegionPtr region) struct sna_pixmap *priv; priv = sna_pixmap(pixmap); + assert(priv != NULL); if (DAMAGE_IS_ALL(priv->gpu_damage)) return; commit c621183466aa55a5938027b702069e792df2272d Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 15:18:56 2012 +0100 sna/dri: Tidy fallback/normal CopyRegion Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c index 6a4a454..d647c02 100644 --- a/src/sna/sna_dri.c +++ b/src/sna/sna_dri.c @@ -578,8 +578,6 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, get_drawable_deltas(draw, pixmap, &dx, &dy); } - sna_dri_select_mode(sna, src_bo, flush); - damage(pixmap, region); if (region) { boxes = REGION_RECTS(region); @@ -596,16 +594,18 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region, dst_bo, dx, dy, boxes, n); } else { + sna_dri_select_mode(sna, src_bo, flush); + sna->render.copy_boxes(sna, GXcopy, (PixmapPtr)draw, src_bo, -draw->x, -draw->y, pixmap, dst_bo, dx, dy, boxes, n, COPY_LAST); - } - DBG(("%s: flushing? %d\n", __FUNCTION__, flush)); - if (flush) { /* STAT! */ - kgem_submit(&sna->kgem); - bo = kgem_get_last_request(&sna->kgem); + DBG(("%s: flushing? %d\n", __FUNCTION__, flush)); + if (flush) { /* STAT! */ + kgem_submit(&sna->kgem); + bo = kgem_get_last_request(&sna->kgem); + } } pixman_region_translate(region, dx, dy); @@ -670,8 +670,6 @@ sna_dri_copy_from_front(struct sna *sna, DrawablePtr draw, RegionPtr region, get_drawable_deltas(draw, pixmap, &dx, &dy); } - sna_dri_select_mode(sna, src_bo, false); - if (region) { boxes = REGION_RECTS(region); n = REGION_NUM_RECTS(region); @@ -688,6 +686,7 @@ sna_dri_copy_from_front(struct sna *sna, DrawablePtr draw, RegionPtr region, dst_bo, -draw->x, -draw->y, boxes, n); } else { + sna_dri_select_mode(sna, src_bo, false); sna->render.copy_boxes(sna, GXcopy, pixmap, src_bo, dx, dy, (PixmapPtr)draw, dst_bo, -draw->x, -draw->y, @@ -730,14 +729,13 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region, n = 1; } - sna_dri_select_mode(sna, src_bo, false); - if (wedged(sna)) { sna_dri_copy_fallback(sna, draw->bitsPerPixel, src_bo, 0, 0, dst_bo, 0, 0, boxes, n); } else { + sna_dri_select_mode(sna, src_bo, false); sna->render.copy_boxes(sna, GXcopy, (PixmapPtr)draw, src_bo, 0, 0, (PixmapPtr)draw, dst_bo, 0, 0, commit 52d2491a1bafb979d79bb970027c55788f199acb Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Jul 26 15:19:14 2012 +0100 sna/video: Protect against attempting to use TexturedVideo whilst wedged Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1svfv9-0002y9...@vasks.debian.org