debian/changelog | 1 debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch | 29 debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch | 90 +++ debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch | 141 ++++ debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch | 162 +++++ debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch | 34 + debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch | 127 ++++ debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch | 84 ++ debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch | 36 + debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch | 293 ++++++++++ debian/patches/0010-i965-fs-Don-t-double-emit-SEND-dependency-workaround.patch | 37 + debian/patches/0011-i965-fs-Use-LD-messages-for-pre-gen7-varying-index-u.patch | 258 ++++++++ debian/patches/0012-i965-fs-Allow-CSE-on-pre-gen7-varying-index-uniform-.patch | 37 + debian/patches/i965-add-a-bit-more-instruction-dumping.diff | 57 + debian/patches/i965-also-do-the-gen4-send-dependency.diff | 58 + debian/patches/i965-enable-cse-on-uniform-pull-constant-loads.diff | 35 + debian/patches/i965-fix-broken-rendering-in-large-shaders.diff | 27 debian/patches/i965-specialize-surface_state-creation.diff | 127 ++++ debian/patches/i965-switch-to-using-sampler-ld-messages.diff | 209 +++++++ debian/patches/series | 20 20 files changed, 1862 insertions(+)
New commits: commit a6f3122dcf667b178a73c0db6eabc14089f4e334 Author: Timo Aaltonen <tjaal...@ubuntu.com> Date: Mon Apr 1 09:06:24 2013 +0300 Added a bunch of patches to fix slow blur on intel. diff --git a/debian/changelog b/debian/changelog index 6e7686f..26a916b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,7 @@ mesa (9.1.1-0ubuntu1) UNRELEASED; urgency=low [ Timo Aaltonen ] * Merge from unreleased debian git - new upstream release (LP: #1112147) + * Added a bunch of patches to fix slow blur on intel. -- Timo Aaltonen <tjaal...@ubuntu.com> Wed, 27 Feb 2013 11:12:43 +0200 diff --git a/debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch b/debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch new file mode 100644 index 0000000..3368dff --- /dev/null +++ b/debian/patches/0001-i965-fs-Remove-creation-of-a-MOV-instruction-that-s-.patch @@ -0,0 +1,29 @@ +From e3bb00afd8528eab84e57ce3004aa65358d8c34f Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Wed, 13 Mar 2013 12:17:25 -0700 +Subject: [PATCH 01/12] i965/fs: Remove creation of a MOV instruction that's + never used. + +We weren't inserting it into the list, so it did nothing. This line was +replaced by the MOV/MUL block above. + +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs.cpp | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index 5a5bfeb..2fb8989 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -253,7 +253,6 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, + } else { + instructions.push_tail(MUL(mrf, offset, fs_reg(4))); + } +- inst = MOV(mrf, offset); + inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, + dst, surf_index); + inst->header_present = header_present; +-- +1.8.1.2 + diff --git a/debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch b/debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch new file mode 100644 index 0000000..e706bb1 --- /dev/null +++ b/debian/patches/0002-i965-fs-Move-varying-uniform-offset-compuation-into-.patch @@ -0,0 +1,90 @@ +From 88a5caea52f56aab5641fddfd23732cb3ecfaf13 Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Wed, 13 Mar 2013 12:27:17 -0700 +Subject: [PATCH 02/12] i965/fs: Move varying uniform offset compuation into + the helper func. + +I'm going to want to change the math for gen7 using sampler LD +instructions in a way that gets CSE to occur like we'd hope. + +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs.cpp | 16 +++++++++------- + src/mesa/drivers/dri/i965/brw_fs.h | 3 ++- + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 ++--- + 3 files changed, 13 insertions(+), 11 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index 2fb8989..89b08e8 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -229,11 +229,15 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition) + + exec_list + fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, +- fs_reg offset) ++ fs_reg varying_offset, ++ uint32_t const_offset) + { + exec_list instructions; + fs_inst *inst; + ++ fs_reg offset = fs_reg(this, glsl_type::uint_type); ++ instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset))); ++ + if (intel->gen >= 7) { + inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, + dst, surf_index, offset); +@@ -1625,15 +1629,13 @@ fs_visitor::move_uniform_array_access_to_pull_constants() + base_ir = inst->ir; + current_annotation = inst->annotation; + +- fs_reg offset = fs_reg(this, glsl_type::int_type); +- inst->insert_before(ADD(offset, *inst->src[i].reladdr, +- fs_reg(pull_constant_loc[uniform] + +- inst->src[i].reg_offset))); +- + fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER); + fs_reg temp = fs_reg(this, glsl_type::float_type); + exec_list list = VARYING_PULL_CONSTANT_LOAD(temp, +- surf_index, offset); ++ surf_index, ++ *inst->src[i].reladdr, ++ pull_constant_loc[uniform] + ++ inst->src[i].reg_offset); + inst->insert_before(&list); + + inst->src[i].file = temp.file; +diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h +index 254a534..76130b1 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.h ++++ b/src/mesa/drivers/dri/i965/brw_fs.h +@@ -294,7 +294,8 @@ public: + fs_reg reg); + + exec_list VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, +- fs_reg offset); ++ fs_reg varying_offset, ++ uint32_t const_offset); + + bool run(); + void setup_payload_gen4(); +diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +index 735a33d..6b6af8d 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +@@ -650,9 +650,8 @@ fs_visitor::visit(ir_expression *ir) + emit(SHR(base_offset, op[1], fs_reg(2))); + + for (int i = 0; i < ir->type->vector_elements; i++) { +- fs_reg offset = fs_reg(this, glsl_type::int_type); +- emit(ADD(offset, base_offset, fs_reg(i))); +- emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, offset)); ++ emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, ++ base_offset, i)); + + if (ir->type->base_type == GLSL_TYPE_BOOL) + emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ)); +-- +1.8.1.2 + diff --git a/debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch b/debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch new file mode 100644 index 0000000..17d834e --- /dev/null +++ b/debian/patches/0003-i965-Make-the-constant-surface-interface-take-a-norm.patch @@ -0,0 +1,141 @@ +From 406b0516036273010399ac7a520a765de66df610 Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Wed, 20 Mar 2013 10:46:20 -0700 +Subject: [PATCH 03/12] i965: Make the constant surface interface take a normal + byte size. + +This puts the rounding-up logic into the function itself instead of all +the callers having to manage it. Also drop an "unused" comment in gen4, +as the stride *is* used for texbos (and will be for uniforms soon). + +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 7 +++---- + src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 16 +++++++--------- + src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 8 +++++--- + src/mesa/drivers/dri/intel/intel_context.h | 2 +- + 4 files changed, 16 insertions(+), 17 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +index 2aefc0c..6c0b690 100644 +--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c ++++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +@@ -68,9 +68,9 @@ brw_upload_vs_pull_constants(struct brw_context *brw) + + /* _NEW_PROGRAM_CONSTANTS */ + drm_intel_bo_unreference(brw->vs.const_bo); ++ uint32_t size = brw->vs.prog_data->nr_pull_params * 4; + brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", +- brw->vs.prog_data->nr_pull_params * 4, +- 64); ++ size, 64); + + drm_intel_gem_bo_map_gtt(brw->vs.const_bo); + for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) { +@@ -90,8 +90,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) + drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo); + + const int surf = SURF_INDEX_VERT_CONST_BUFFER; +- intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, +- ALIGN(brw->vs.prog_data->nr_pull_params, 4) / 4, ++ intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size, + &brw->vs.surf_offset[surf]); + + brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; +diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +index 932e472..98eed15 100644 +--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c ++++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +@@ -916,11 +916,13 @@ void + brw_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + uint32_t offset, +- int width, ++ uint32_t size, + uint32_t *out_offset) + { + struct intel_context *intel = &brw->intel; +- const GLint w = width - 1; ++ uint32_t stride = 16; ++ uint32_t elements = ALIGN(size, stride) / stride; ++ const GLint w = elements - 1; + uint32_t *surf; + + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, +@@ -939,7 +941,7 @@ brw_create_constant_surface(struct brw_context *brw, + ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT); + + surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT | +- (16 - 1) << BRW_SURFACE_PITCH_SHIFT); /* ignored */ ++ (stride - 1) << BRW_SURFACE_PITCH_SHIFT); + + surf[4] = 0; + surf[5] = 0; +@@ -1086,8 +1088,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw) + } + drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); + +- intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, +- ALIGN(brw->wm.prog_data->nr_pull_params, 4) / 4, ++ intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size, + &brw->wm.surf_offset[surf_index]); + + brw->state.dirty.brw |= BRW_NEW_SURFACES; +@@ -1439,11 +1440,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw, + * glBindBufferRange case is undefined, we can just bind the whole buffer + * glBindBufferBase wants and be a correct implementation. + */ +- int size = bo->size - binding->Offset; +- size = ALIGN(size, 16) / 16; /* The interface takes a number of vec4s */ +- + intel->vtbl.create_constant_surface(brw, bo, binding->Offset, +- size, ++ bo->size - binding->Offset, + &surf_offsets[i]); + } + +diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +index db04253..484afcd 100644 +--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c ++++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +@@ -383,11 +383,13 @@ static void + gen7_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + uint32_t offset, +- int width, ++ uint32_t size, + uint32_t *out_offset) + { + struct intel_context *intel = &brw->intel; +- const GLint w = width - 1; ++ uint32_t stride = 16; ++ uint32_t elements = ALIGN(size, stride) / stride; ++ const GLint w = elements - 1; + + uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 8 * 4, 32, out_offset); +@@ -403,7 +405,7 @@ gen7_create_constant_surface(struct brw_context *brw, + surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) | + SET_FIELD((w >> 7) & 0x1fff, GEN7_SURFACE_HEIGHT); + surf[3] = SET_FIELD((w >> 20) & 0x7f, BRW_SURFACE_DEPTH) | +- (16 - 1); /* stride between samples */ ++ (stride - 1); + + if (intel->is_haswell) { + surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | +diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h +index 2df15d4..bb21f55 100644 +--- a/src/mesa/drivers/dri/intel/intel_context.h ++++ b/src/mesa/drivers/dri/intel/intel_context.h +@@ -202,7 +202,7 @@ struct intel_context + void (*create_constant_surface)(struct brw_context *brw, + drm_intel_bo *bo, + uint32_t offset, +- int width, ++ uint32_t size, + uint32_t *out_offset); + /** \} */ + } vtbl; +-- +1.8.1.2 + diff --git a/debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch b/debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch new file mode 100644 index 0000000..9bbcd0c --- /dev/null +++ b/debian/patches/0004-i965-Make-the-fragment-shader-pull-constants-index-b.patch @@ -0,0 +1,162 @@ +From 4921232ba622d327f238731874c36a288e605515 Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Thu, 14 Mar 2013 14:41:37 -0700 +Subject: [PATCH 04/12] i965: Make the fragment shader pull constants index by + dwords, not vec4s. + +We want to load vec4s, since loading a vec4 instead of a dword is +basically no increased latency. But for variable indexed access, the +previous requirement of aligned vec4s for a sampler LD was hard to +implement. + +Note that this change only affects those messages that use the surface +format, like sampler LDs, but not to the untyped data cache loads we've +used in other cases. + +No significant performance difference on my GLSL demo with uniforms forced +to take the varying pull constants path (n=4). + +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++++- + src/mesa/drivers/dri/i965/brw_state.h | 5 ----- + src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 2 +- + src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 ++++++++----- + src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 5 +++-- + src/mesa/drivers/dri/intel/intel_context.h | 5 +++-- + 6 files changed, 19 insertions(+), 16 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index 89b08e8..fbe9e3a 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -2483,10 +2483,13 @@ fs_visitor::lower_uniform_pull_constant_loads() + continue; + + if (intel->gen >= 7) { ++ /* The offset arg before was a vec4-aligned byte offset. We need to ++ * turn it into a dword offset. ++ */ + fs_reg const_offset_reg = inst->src[1]; + assert(const_offset_reg.file == IMM && + const_offset_reg.type == BRW_REGISTER_TYPE_UD); +- const_offset_reg.imm.u /= 16; ++ const_offset_reg.imm.u /= 4; + fs_reg payload = fs_reg(this, glsl_type::uint_type); + + /* This is actually going to be a MOV, but since only the first dword +diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h +index 02ce57b..29ec276 100644 +--- a/src/mesa/drivers/dri/i965/brw_state.h ++++ b/src/mesa/drivers/dri/i965/brw_state.h +@@ -187,11 +187,6 @@ void *brw_state_batch(struct brw_context *brw, + void gen4_init_vtable_surface_functions(struct brw_context *brw); + uint32_t brw_get_surface_tiling_bits(uint32_t tiling); + uint32_t brw_get_surface_num_multisamples(unsigned num_samples); +-void brw_create_constant_surface(struct brw_context *brw, +- drm_intel_bo *bo, +- uint32_t offset, +- int width, +- uint32_t *out_offset); + + uint32_t brw_format_for_mesa_format(gl_format mesa_format); + +diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +index 6c0b690..675a84c 100644 +--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c ++++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +@@ -91,7 +91,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) + + const int surf = SURF_INDEX_VERT_CONST_BUFFER; + intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size, +- &brw->vs.surf_offset[surf]); ++ &brw->vs.surf_offset[surf], false); + + brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; + } +diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +index 98eed15..506ddf0 100644 +--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c ++++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +@@ -912,15 +912,16 @@ brw_update_texture_surface(struct gl_context *ctx, + * Create the constant buffer surface. Vertex/fragment shader constants will be + * read from this buffer with Data Port Read instructions/messages. + */ +-void ++static void + brw_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + uint32_t offset, + uint32_t size, +- uint32_t *out_offset) ++ uint32_t *out_offset, ++ bool dword_pitch) + { + struct intel_context *intel = &brw->intel; +- uint32_t stride = 16; ++ uint32_t stride = dword_pitch ? 4 : 16; + uint32_t elements = ALIGN(size, stride) / stride; + const GLint w = elements - 1; + uint32_t *surf; +@@ -1089,7 +1090,8 @@ brw_upload_wm_pull_constants(struct brw_context *brw) + drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); + + intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size, +- &brw->wm.surf_offset[surf_index]); ++ &brw->wm.surf_offset[surf_index], ++ true); + + brw->state.dirty.brw |= BRW_NEW_SURFACES; + } +@@ -1442,7 +1444,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw, + */ + intel->vtbl.create_constant_surface(brw, bo, binding->Offset, + bo->size - binding->Offset, +- &surf_offsets[i]); ++ &surf_offsets[i], ++ shader->Type == GL_FRAGMENT_SHADER); + } + + if (shader->NumUniformBlocks) +diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +index 484afcd..2c12be3 100644 +--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c ++++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +@@ -384,10 +384,11 @@ gen7_create_constant_surface(struct brw_context *brw, + drm_intel_bo *bo, + uint32_t offset, + uint32_t size, +- uint32_t *out_offset) ++ uint32_t *out_offset, ++ bool dword_pitch) + { + struct intel_context *intel = &brw->intel; +- uint32_t stride = 16; ++ uint32_t stride = dword_pitch ? 4 : 16; + uint32_t elements = ALIGN(size, stride) / stride; + const GLint w = elements - 1; + +diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h +index bb21f55..7bec10f 100644 +--- a/src/mesa/drivers/dri/intel/intel_context.h ++++ b/src/mesa/drivers/dri/intel/intel_context.h +@@ -203,13 +203,14 @@ struct intel_context + drm_intel_bo *bo, + uint32_t offset, + uint32_t size, +- uint32_t *out_offset); ++ uint32_t *out_offset, ++ bool dword_pitch); + /** \} */ + } vtbl; + + GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */ + GLuint NewGLState; +- ++ + dri_bufmgr *bufmgr; + unsigned int maxBatchSize; + +-- +1.8.1.2 + diff --git a/debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch b/debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch new file mode 100644 index 0000000..5e1a22d --- /dev/null +++ b/debian/patches/0005-i965-fs-Avoid-inappropriate-optimization-with-regs_w.patch @@ -0,0 +1,34 @@ +From 0eb070fa7d4a7d4494c70407fc953adc9429edeb Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Fri, 15 Mar 2013 14:31:46 -0700 +Subject: [PATCH 05/12] i965/fs: Avoid inappropriate optimization with + regs_written > 1. + +Right now we don't have anything with regs_written() > 1 and !inst->mlen, +but that's about to change. + +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index fbe9e3a..f1b0789 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -2087,6 +2087,12 @@ fs_visitor::compute_to_mrf() + break; + } + ++ /* Things returning more than one register would need us to ++ * understand coalescing out more than one MOV at a time. ++ */ ++ if (scan_inst->regs_written() > 1) ++ break; ++ + /* SEND instructions can't have MRF as a destination. */ + if (scan_inst->mlen) + break; +-- +1.8.1.2 + diff --git a/debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch b/debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch new file mode 100644 index 0000000..131c379 --- /dev/null +++ b/debian/patches/0006-i965-fs-Improve-performance-of-varying-index-uniform.patch @@ -0,0 +1,127 @@ +From 2f4d09235849e206e2807146bb8c8e724ab6ff26 Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Wed, 13 Mar 2013 14:48:55 -0700 +Subject: [PATCH 06/12] i965/fs: Improve performance of varying-index uniform + loads on IVB. + +Like we have done for the VS and for constant-index uniform loads, we use +the sampler engine to get caching in front of the L3 to avoid tickling the +IVB L3 bug. This is also a bit of a functional change, as we're now +loading a vec4 instead of a single dword, though we're not taking +advantage of the other 3 components of the vec4 (yet). + +With the driver hacked to always take the varying-index path for all +uniforms, improves performance of my old GLSL demo by 315% +/- 2% (n=4). +This a major fix for some blur shaders in compositors from the +varying-index uniforms support I introduced in 9.1. + +v2: Move old offset computation into the pre-gen7 path. + +Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554 +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs.cpp | 29 ++++++++++++++++++++++++----- + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 27 ++++++++++++++------------- + 2 files changed, 38 insertions(+), 18 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index f1b0789..f4aa9f7 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -235,14 +235,33 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, + exec_list instructions; + fs_inst *inst; + +- fs_reg offset = fs_reg(this, glsl_type::uint_type); +- instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset))); +- + if (intel->gen >= 7) { ++ /* We have our constant surface use a pitch of 4 bytes, so our index can ++ * be any component of a vector, and then we load 4 contiguous ++ * components starting from that. ++ * ++ * We break down the const_offset to a portion added to the variable ++ * offset and a portion done using reg_offset, which means that if you ++ * have GLSL using something like "uniform vec4 a[20]; gl_FragColor = ++ * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and ++ * CSE can later notice that those loads are all the same and eliminate ++ * the redundant ones. ++ */ ++ fs_reg vec4_offset = fs_reg(this, glsl_type::int_type); ++ instructions.push_tail(ADD(vec4_offset, ++ varying_offset, const_offset & ~3)); ++ ++ fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type); + inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, +- dst, surf_index, offset); ++ vec4_result, surf_index, vec4_offset); + instructions.push_tail(inst); ++ ++ vec4_result.reg_offset += const_offset & 3; ++ instructions.push_tail(MOV(dst, vec4_result)); + } else { ++ fs_reg offset = fs_reg(this, glsl_type::uint_type); ++ instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset))); ++ + int base_mrf = 13; + bool header_present = true; + +@@ -313,7 +332,7 @@ fs_inst::equals(fs_inst *inst) + int + fs_inst::regs_written() + { +- if (is_tex()) ++ if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7) + return 4; + + /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2, +diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +index 712fef6..4b3c43f 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +@@ -737,28 +737,29 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, + index.type == BRW_REGISTER_TYPE_UD); + uint32_t surf_index = index.dw1.ud; + +- uint32_t msg_control, rlen, mlen; ++ uint32_t simd_mode, rlen, mlen; + if (dispatch_width == 16) { +- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS; +- mlen = rlen = 2; ++ mlen = 2; ++ rlen = 8; ++ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } else { +- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS; +- mlen = rlen = 1; ++ mlen = 1; ++ rlen = 4; ++ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + } + + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, offset); +- if (intel->gen < 6) +- send->header.destreg__conditionalmod = inst->base_mrf; +- brw_set_dp_read_message(p, send, ++ brw_set_sampler_message(p, send, + surf_index, +- msg_control, +- GEN7_DATAPORT_DC_DWORD_SCATTERED_READ, +- BRW_DATAPORT_READ_TARGET_DATA_CACHE, ++ 0, /* LD message ignores sampler unit */ ++ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, ++ rlen, + mlen, +- inst->header_present, +- rlen); ++ false, /* no header */ ++ simd_mode, ++ 0); + } + + /** +-- +1.8.1.2 + diff --git a/debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch b/debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch new file mode 100644 index 0000000..b5cfe0e --- /dev/null +++ b/debian/patches/0007-i965-fs-Do-CSE-on-gen7-s-varying-index-pull-constant.patch @@ -0,0 +1,84 @@ +From b5f8ad54c7bfd624209e4ae7d36abac0093ddb9a Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Fri, 15 Mar 2013 14:43:28 -0700 +Subject: [PATCH 07/12] i965/fs: Do CSE on gen7's varying-index pull constant + loads. + +This is our first CSE on a regs_written() > 1 instruction, so it takes a +bit of extra fixup. Reduces the number of loads on kwin's Lanczos shader +from 12 to 2. + +Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554 +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 43 ++++++++++++++++++++++++-------- + 1 file changed, 32 insertions(+), 11 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +index 02642c9..c89da36 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +@@ -68,6 +68,7 @@ is_expression(const fs_inst *const inst) + case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: ++ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: + case FS_OPCODE_CINTERP: + case FS_OPCODE_LINTERP: + return true; +@@ -129,21 +130,41 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) + */ + bool no_existing_temp = entry->tmp.file == BAD_FILE; + if (no_existing_temp) { +- entry->tmp = fs_reg(this, glsl_type::float_type); +- entry->tmp.type = inst->dst.type; +- +- fs_inst *copy = new(ralloc_parent(inst)) +- fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp); +- entry->generator->insert_after(copy); +- entry->generator->dst = entry->tmp; ++ int written = entry->generator->regs_written(); ++ ++ fs_reg orig_dst = entry->generator->dst; ++ fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), ++ orig_dst.type); ++ entry->tmp = tmp; ++ entry->generator->dst = tmp; ++ ++ for (int i = 0; i < written; i++) { ++ fs_inst *copy = MOV(orig_dst, tmp); ++ copy->force_writemask_all = ++ entry->generator->force_writemask_all; ++ entry->generator->insert_after(copy); ++ ++ orig_dst.reg_offset++; ++ tmp.reg_offset++; ++ } + } + + /* dest <- temp */ ++ int written = inst->regs_written(); ++ assert(written == entry->generator->regs_written()); + assert(inst->dst.type == entry->tmp.type); +- fs_inst *copy = new(ralloc_parent(inst)) +- fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp); +- copy->force_writemask_all = inst->force_writemask_all; +- inst->replace_with(copy); ++ fs_reg dst = inst->dst; ++ fs_reg tmp = entry->tmp; ++ fs_inst *copy; ++ for (int i = 0; i < written; i++) { ++ copy = MOV(dst, tmp); ++ copy->force_writemask_all = inst->force_writemask_all; ++ inst->insert_before(copy); ++ ++ dst.reg_offset++; ++ tmp.reg_offset++; ++ } ++ inst->remove(); + + /* Appending an instruction may have changed our bblock end. */ + if (inst == block->end) { +-- +1.8.1.2 + diff --git a/debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch b/debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch new file mode 100644 index 0000000..4827a9c --- /dev/null +++ b/debian/patches/0008-i965-fs-Clean-up-the-setup-of-gen4-simd16-message-de.patch @@ -0,0 +1,36 @@ +From 2a0a69e2169eee805b6068f930c3b3049b362a91 Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Mon, 18 Mar 2013 11:26:17 -0700 +Subject: [PATCH 08/12] i965/fs: Clean up the setup of gen4 simd16 message + destinations. + +I think this makes it much more obvious what's going on here. + +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +index 6b6af8d..48c6df3 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +@@ -916,11 +916,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, + * this weirdness around to the expected layout. + */ + orig_dst = dst; +- const glsl_type *vec_type = +- glsl_type::get_instance(ir->type->base_type, 4, 1); +- dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); +- dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) +- : BRW_REGISTER_TYPE_F; ++ dst = fs_reg(GRF, virtual_grf_alloc(8), ++ (intel->is_g4x ? ++ brw_type_for_base_type(ir->type) : ++ BRW_REGISTER_TYPE_F)); + } + + fs_inst *inst = NULL; +-- +1.8.1.2 + diff --git a/debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch b/debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch new file mode 100644 index 0000000..3a430d1 --- /dev/null +++ b/debian/patches/0009-i965-fs-Bake-regs_written-into-the-IR-instead-of-rec.patch @@ -0,0 +1,293 @@ +From bb1d21826152370209fd64b9abffd8a59d3ec5f4 Mon Sep 17 00:00:00 2001 +From: Eric Anholt <e...@anholt.net> +Date: Mon, 18 Mar 2013 11:30:57 -0700 +Subject: [PATCH 09/12] i965/fs: Bake regs_written into the IR instead of + recomputing it later. + +For sampler messages, it depends on the target gen, and on gen4 +SIMD16-sampler-on-SIMD8-execution we were returning 4 instead of 8 like we +should. + +NOTE: This is a candidate for the 9.1 branch. +--- + src/mesa/drivers/dri/i965/brw_fs.cpp | 29 ++++++++-------------- + src/mesa/drivers/dri/i965/brw_fs.h | 2 +- + src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 6 ++--- + .../drivers/dri/i965/brw_fs_live_variables.cpp | 2 +- + src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 +++--- + .../dri/i965/brw_fs_schedule_instructions.cpp | 6 ++--- + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ++++-- + 7 files changed, 27 insertions(+), 33 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index f4aa9f7..c128175 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -60,6 +60,9 @@ fs_inst::init() + this->src[0] = reg_undef; + this->src[1] = reg_undef; + this->src[2] = reg_undef; ++ ++ /* This will be the case for almost all instructions. */ ++ this->regs_written = 1; + } + + fs_inst::fs_inst() +@@ -254,6 +257,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, + fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type); + inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, + vec4_result, surf_index, vec4_offset); ++ inst->regs_written = 4; + instructions.push_tail(inst); + + vec4_result.reg_offset += const_offset & 3; +@@ -329,26 +333,13 @@ fs_inst::equals(fs_inst *inst) + offset == inst->offset); + } + +-int +-fs_inst::regs_written() +-{ +- if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7) +- return 4; +- +- /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2, +- * but we don't currently use them...nor do we have an opcode for them. +- */ +- +- return 1; +-} +- + bool + fs_inst::overwrites_reg(const fs_reg ®) + { + return (reg.file == dst.file && + reg.reg == dst.reg && + reg.reg_offset >= dst.reg_offset && +- reg.reg_offset < dst.reg_offset + regs_written()); ++ reg.reg_offset < dst.reg_offset + regs_written); + } + + bool +@@ -1388,7 +1379,7 @@ fs_visitor::split_virtual_grfs() + /* If there's a SEND message that requires contiguous destination + * registers, no splitting is allowed. + */ +- if (inst->regs_written() > 1) { ++ if (inst->regs_written > 1) { + split_grf[inst->dst.reg] = false; + } + } +@@ -2109,7 +2100,7 @@ fs_visitor::compute_to_mrf() + /* Things returning more than one register would need us to + * understand coalescing out more than one MOV at a time. + */ +- if (scan_inst->regs_written() > 1) ++ if (scan_inst->regs_written > 1) + break; + + /* SEND instructions can't have MRF as a destination. */ +@@ -2326,7 +2317,7 @@ void + fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) + { + int reg_size = dispatch_width / 8; +- int write_len = inst->regs_written() * reg_size; ++ int write_len = inst->regs_written * reg_size; + int first_write_grf = inst->dst.reg; + bool needs_dep[BRW_MAX_MRF]; + assert(write_len < (int)sizeof(needs_dep) - 1); +@@ -2366,7 +2357,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) + * dependency has more latency than a MOV. + */ + if (scan_inst->dst.file == GRF) { +- for (int i = 0; i < scan_inst->regs_written(); i++) { ++ for (int i = 0; i < scan_inst->regs_written; i++) { + int reg = scan_inst->dst.reg + i * reg_size; + + if (reg >= first_write_grf && +@@ -2405,7 +2396,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) + void + fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst) + { +- int write_len = inst->regs_written() * dispatch_width / 8; ++ int write_len = inst->regs_written * dispatch_width / 8; + int first_write_grf = inst->dst.reg; + bool needs_dep[BRW_MAX_MRF]; + assert(write_len < (int)sizeof(needs_dep) - 1); +diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h +index 76130b1..0c5aad1 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.h ++++ b/src/mesa/drivers/dri/i965/brw_fs.h +@@ -174,7 +174,6 @@ public: + fs_reg src0, fs_reg src1,fs_reg src2); + + bool equals(fs_inst *inst); +- int regs_written(); + bool overwrites_reg(const fs_reg ®); + bool is_tex(); + bool is_math(); +@@ -192,6 +191,7 @@ public: + uint8_t flag_subreg; + + int mlen; /**< SEND message length */ ++ int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */ + int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ + uint32_t texture_offset; /**< Texture offset bitfield */ + int sampler; +diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +index c89da36..01a64d2 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +@@ -130,7 +130,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) + */ + bool no_existing_temp = entry->tmp.file == BAD_FILE; + if (no_existing_temp) { +- int written = entry->generator->regs_written(); ++ int written = entry->generator->regs_written; + + fs_reg orig_dst = entry->generator->dst; + fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), +@@ -150,8 +150,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) + } + + /* dest <- temp */ +- int written = inst->regs_written(); +- assert(written == entry->generator->regs_written()); ++ int written = inst->regs_written; ++ assert(written == entry->generator->regs_written); + assert(inst->dst.type == entry->tmp.type); + fs_reg dst = inst->dst; + fs_reg tmp = entry->tmp; +diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +index 63af148..373aa2d 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +@@ -77,7 +77,7 @@ fs_live_variables::setup_def_use() + * variable, and thus qualify for being in def[]. + */ + if (inst->dst.file == GRF && +- inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] && ++ inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] && + !inst->predicate && + !inst->force_uncompressed && + !inst->force_sechalf) { +diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +index b8936dc..4ee7bbc 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +@@ -553,7 +553,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) + } + + if (inst->dst.file == GRF) { +- spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale; ++ spill_costs[inst->dst.reg] += inst->regs_written * loop_scale; + + if (inst->dst.smear >= 0) { + no_spill[inst->dst.reg] = true; +@@ -622,7 +622,7 @@ fs_visitor::spill_reg(int spill_reg) + inst->dst.reg == spill_reg) { + int subset_spill_offset = (spill_offset + + REG_SIZE * inst->dst.reg_offset); +- inst->dst.reg = virtual_grf_alloc(inst->regs_written()); ++ inst->dst.reg = virtual_grf_alloc(inst->regs_written); + inst->dst.reg_offset = 0; + + /* If our write is going to affect just part of the +@@ -631,7 +631,7 @@ fs_visitor::spill_reg(int spill_reg) + */ + if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) { + fs_reg unspill_reg = inst->dst; +- for (int chan = 0; chan < inst->regs_written(); chan++) { ++ for (int chan = 0; chan < inst->regs_written; chan++) { + emit_unspill(inst, unspill_reg, -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1umxth-0007ir...@vasks.debian.org