Ugh, and I also forgot: you can remove brw_vs_unit from brw_state.h too.
On Thu, May 11, 2017 at 09:54:13AM -0700, Rafael Antognolli wrote: > On Wed, May 10, 2017 at 12:41:39PM -0700, Kenneth Graunke wrote: > > It's actually not that much code. > > --- > > src/intel/genxml/gen4.xml | 2 +- > > src/intel/genxml/gen45.xml | 2 +- > > src/intel/genxml/gen5.xml | 2 +- > > src/mesa/drivers/dri/i965/Makefile.sources | 1 - > > src/mesa/drivers/dri/i965/brw_vs_state.c | 192 > > -------------------------- > > src/mesa/drivers/dri/i965/genX_state_upload.c | 74 +++++++++- > > 6 files changed, 71 insertions(+), 202 deletions(-) > > delete mode 100644 src/mesa/drivers/dri/i965/brw_vs_state.c > > > > diff --git a/src/intel/genxml/gen4.xml b/src/intel/genxml/gen4.xml > > index f4d58879bcc..da34d6ca19b 100644 > > --- a/src/intel/genxml/gen4.xml > > +++ b/src/intel/genxml/gen4.xml > > @@ -750,13 +750,13 @@ > > <field name="URB Entry Allocation Size" start="147" end="151" > > type="uint"/> > > <field name="Number of URB Entries" start="139" end="146" type="uint"/> > > <field name="Statistics Enable" start="138" end="138" type="bool"/> > > <field name="Sampler State Offset" start="165" end="191" > > type="address"/> > > <field name="Sampler Count" start="160" end="162" type="uint"/> > > <field name="Vertex Cache Disable" start="193" end="193" type="bool"/> > > - <field name="Function Enable" start="192" end="192" type="bool"/> > > + <field name="Enable" start="192" end="192" type="bool"/> > > </struct> > > > > <struct name="WM_STATE" length="8"> > > <field name="Kernel Start Pointer" start="6" end="31" type="address"/> > > <field name="GRF Register Count" start="1" end="3" type="uint"/> > > <field name="Single Program Flow" start="63" end="63" type="bool"/> > > diff --git a/src/intel/genxml/gen45.xml b/src/intel/genxml/gen45.xml > > index 4556322fee5..0e95cf27e89 100644 > > --- a/src/intel/genxml/gen45.xml > > +++ b/src/intel/genxml/gen45.xml > > @@ -701,13 +701,13 @@ > > <field name="URB Entry Allocation Size" start="147" end="151" > > type="uint"/> > > <field name="Number of URB Entries" start="139" end="146" type="uint"/> > > <field name="Statistics Enable" start="138" end="138" type="bool"/> > > <field name="Sampler State Offset" start="165" end="191" > > type="address"/> > > <field name="Sampler Count" start="160" end="162" type="uint"/> > > <field name="Vertex Cache Disable" start="193" end="193" type="bool"/> > > - <field name="Function Enable" start="192" end="192" type="bool"/> > > + <field name="Enable" start="192" end="192" type="bool"/> > > </struct> > > > > <struct name="WM_STATE" length="8"> > > <field name="Kernel Start Pointer" start="6" end="31" type="address"/> > > <field name="GRF Register Count" start="1" end="3" type="uint"/> > > <field name="Single Program Flow" start="63" end="63" type="bool"/> > > diff --git a/src/intel/genxml/gen5.xml b/src/intel/genxml/gen5.xml > > index 3d80de9cf1e..c9a18dd18e3 100644 > > --- a/src/intel/genxml/gen5.xml > > +++ b/src/intel/genxml/gen5.xml > > @@ -857,13 +857,13 @@ > > <field name="URB Entry Allocation Size" start="147" end="151" > > type="uint"/> > > <field name="Number of URB Entries" start="139" end="146" type="uint"/> > > <field name="Statistics Enable" start="138" end="138" type="bool"/> > > <field name="Sampler State Offset" start="165" end="191" > > type="address"/> > > <field name="Sampler Count" start="160" end="162" type="uint"/> > > <field name="Vertex Cache Disable" start="193" end="193" type="bool"/> > > - <field name="Function Enable" start="192" end="192" type="bool"/> > > + <field name="Enable" start="192" end="192" type="bool"/> > > </struct> > > > > <struct name="WM_STATE" length="11"> > > <field name="Kernel Start Pointer" start="6" end="31" type="offset"/> > > <field name="GRF Register Count" start="1" end="3" type="uint"/> > > <field name="Single Program Flow" start="63" end="63" type="bool"/> > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > > b/src/mesa/drivers/dri/i965/Makefile.sources > > index 9e567cbc908..349777cf24a 100644 > > --- a/src/mesa/drivers/dri/i965/Makefile.sources > > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > > @@ -67,13 +67,12 @@ i965_FILES = \ > > brw_tex_layout.c \ > > brw_urb.c \ > > brw_util.c \ > > brw_util.h \ > > brw_vs.c \ > > brw_vs.h \ > > - brw_vs_state.c \ > > brw_vs_surface_state.c \ > > brw_wm.c \ > > brw_wm.h \ > > brw_wm_state.c \ > > brw_wm_surface_state.c \ > > gen6_clip_state.c \ > > diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c > > b/src/mesa/drivers/dri/i965/brw_vs_state.c > > deleted file mode 100644 > > index fafe305f4f7..00000000000 > > --- a/src/mesa/drivers/dri/i965/brw_vs_state.c > > +++ /dev/null > > @@ -1,192 +0,0 @@ > > -/* > > - Copyright (C) Intel Corp. 2006. All Rights Reserved. > > - Intel funded Tungsten Graphics to > > - develop this 3D driver. > > - > > - Permission is hereby granted, free of charge, to any person obtaining > > - a copy of this software and associated documentation files (the > > - "Software"), to deal in the Software without restriction, including > > - without limitation the rights to use, copy, modify, merge, publish, > > - distribute, sublicense, and/or sell copies of the Software, and to > > - permit persons to whom the Software is furnished to do so, subject to > > - the following conditions: > > - > > - The above copyright notice and this permission notice (including the > > - next paragraph) shall be included in all copies or substantial > > - portions of the Software. > > - > > - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. > > - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE > > - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION > > - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION > > - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > > - > > - **********************************************************************/ > > - /* > > - * Authors: > > - * Keith Whitwell <kei...@vmware.com> > > - */ > > - > > - > > - > > -#include "intel_batchbuffer.h" > > -#include "brw_context.h" > > -#include "brw_state.h" > > -#include "brw_defines.h" > > -#include "main/macros.h" > > - > > -static void > > -brw_upload_vs_unit(struct brw_context *brw) > > -{ > > - const struct gen_device_info *devinfo = &brw->screen->devinfo; > > - struct brw_stage_state *stage_state = &brw->vs.base; > > - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; > > - const struct brw_vue_prog_data *vue_prog_data = > > - brw_vue_prog_data(stage_state->prog_data); > > - > > - struct brw_vs_unit_state *vs; > > It looks like brw_vs_unit_state is not used anywhere else, so maybe we could > delete it too. Also, it looks weird that some fields in vs->thread0 don't > really match the dw1 in the docs for gen5 at least, but they are not used > anyway. > > > - vs = brw_state_batch(brw, sizeof(*vs), 32, &stage_state->state_offset); > > - memset(vs, 0, sizeof(*vs)); > > - > > - /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_VS_PROG_DATA */ > > - vs->thread0.grf_reg_count = ALIGN(vue_prog_data->total_grf, 16) / 16 - > > 1; > > - vs->thread0.kernel_start_pointer = > > - brw_program_reloc(brw, > > - stage_state->state_offset + > > - offsetof(struct brw_vs_unit_state, thread0), > > - stage_state->prog_offset + > > - (vs->thread0.grf_reg_count << 1)) >> 6; > > - > > - if (prog_data->use_alt_mode) > > - vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; > > - else > > - vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754; > > - > > - /* Choosing multiple program flow means that we may get 2-vertex > > threads, > > - * which will have the channel mask for dwords 4-7 enabled in the > > thread, > > - * and those dwords will be written to the second URB handle when we > > - * brw_urb_WRITE() results. > > - */ > > - /* Force single program flow on Ironlake. We cannot reliably get > > - * all applications working without it. See: > > - * https://bugs.freedesktop.org/show_bug.cgi?id=29172 > > - * > > - * The most notable and reliably failing application is the Humus > > - * demo "CelShading" > > - */ > > - vs->thread1.single_program_flow = (brw->gen == 5); > > - > > - vs->thread1.binding_table_entry_count = > > - prog_data->binding_table.size_bytes / 4; > > - > > - if (prog_data->total_scratch != 0) { > > - vs->thread2.scratch_space_base_pointer = > > - stage_state->scratch_bo->offset64 >> 10; /* reloc */ > > - vs->thread2.per_thread_scratch_space = > > - ffs(stage_state->per_thread_scratch) - 11; > > - } else { > > - vs->thread2.scratch_space_base_pointer = 0; > > - vs->thread2.per_thread_scratch_space = 0; > > - } > > - > > - vs->thread3.urb_entry_read_length = vue_prog_data->urb_read_length; > > - vs->thread3.const_urb_entry_read_length = prog_data->curb_read_length; > > - vs->thread3.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg; > > - vs->thread3.urb_entry_read_offset = 0; > > - > > - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ > > - vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; > > - > > - /* BRW_NEW_URB_FENCE */ > > - if (brw->gen == 5) { > > - switch (brw->urb.nr_vs_entries) { > > - case 8: > > - case 12: > > - case 16: > > - case 32: > > - case 64: > > - case 96: > > - case 128: > > - case 168: > > - case 192: > > - case 224: > > - case 256: > > - vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2; > > - break; > > - default: > > - unreachable("not reached"); > > - } > > - } else { > > - switch (brw->urb.nr_vs_entries) { > > - case 8: > > - case 12: > > - case 16: > > - case 32: > > - break; > > - case 64: > > - assert(brw->is_g4x); > > - break; > > - default: > > - unreachable("not reached"); > > - } > > - vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries; > > - } > > - > > - vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; > > - > > - vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2, > > - 1, devinfo->max_vs_threads) - 1; > > - > > - if (brw->gen == 5) > > - vs->vs5.sampler_count = 0; /* hardware requirement */ > > - else { > > - vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4; > > - } > > - > > - /* Vertex program always enabled: > > - */ > > - vs->vs6.vs_enable = 1; > > - > > - /* Set the sampler state pointer, and its reloc > > - */ > > - if (stage_state->sampler_count) { > > - /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */ > > - vs->vs5.sampler_state_pointer = > > - (brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5; > > - brw_emit_reloc(&brw->batch, > > - stage_state->state_offset + > > - offsetof(struct brw_vs_unit_state, vs5), > > - brw->batch.bo, > > - (stage_state->sampler_offset | vs->vs5.sampler_count), > > - I915_GEM_DOMAIN_INSTRUCTION, 0); > > - } > > - > > - /* Emit scratch space relocation */ > > - if (prog_data->total_scratch != 0) { > > - brw_emit_reloc(&brw->batch, > > - stage_state->state_offset + > > - offsetof(struct brw_vs_unit_state, thread2), > > - stage_state->scratch_bo, > > - vs->thread2.per_thread_scratch_space, > > - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); > > - } > > - > > - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; > > -} > > - > > -const struct brw_tracked_state brw_vs_unit = { > > - .dirty = { > > - .mesa = 0, > > - .brw = BRW_NEW_BATCH | > > If I understood correctly, BRW_NEW_BATCH will get covered by BRW_NEW_CONTEXT > when needed, right? > > > - BRW_NEW_BLORP | > > - BRW_NEW_PROGRAM_CACHE | > > - BRW_NEW_PUSH_CONSTANT_ALLOCATION | > > - BRW_NEW_SAMPLER_STATE_TABLE | > > - BRW_NEW_URB_FENCE | > > - BRW_NEW_VS_PROG_DATA, > > - }, > > - .emit = brw_upload_vs_unit, > > -}; > > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c > > b/src/mesa/drivers/dri/i965/genX_state_upload.c > > index 4c6cb1a9b71..bc7f068d5bb 100644 > > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c > > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c > > @@ -130,12 +130,23 @@ instruction_bo(struct brw_bo *bo, uint32_t offset) > > .read_domains = I915_GEM_DOMAIN_INSTRUCTION, > > .write_domain = I915_GEM_DOMAIN_INSTRUCTION, > > }; > > } > > > > static inline struct brw_address > > +instruction_ro_bo(struct brw_bo *bo, uint32_t offset) > > +{ > > + return (struct brw_address) { > > + .bo = bo, > > + .offset = offset, > > + .read_domains = I915_GEM_DOMAIN_INSTRUCTION, > > + .write_domain = 0, > > + }; > > +} > > + > > +static inline struct brw_address > > vertex_bo(struct brw_bo *bo, uint32_t offset) > > { > > return (struct brw_address) { > > .bo = bo, > > .offset = offset, > > .read_domains = I915_GEM_DOMAIN_VERTEX, > > @@ -1690,14 +1701,28 @@ static const struct brw_tracked_state > > genX(wm_state) = { > > .emit = genX(upload_wm), > > }; > > #endif > > > > /* ---------------------------------------------------------------------- > > */ > > > > +#if GEN_GEN == 4 > > +static inline struct brw_address > > +KSP(struct brw_context *brw, uint32_t offset) > > +{ > > + return instruction_bo(brw->cache.bo, offset); > > +} > > +#else > > +static inline uint32_t > > +KSP(struct brw_context *brw, uint32_t offset) > > +{ > > + return offset; > > +} > > +#endif > > + > > #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ > > - pkt.KernelStartPointer = stage_state->prog_offset; \ > > + pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \ > > pkt.SamplerCount = \ > > DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ > > pkt.BindingTableEntryCount = \ > > stage_prog_data->binding_table.size_bytes / 4; \ > > pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \ > > \ > > @@ -1713,18 +1738,18 @@ static const struct brw_tracked_state > > genX(wm_state) = { > > pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ > > pkt.prefix##URBEntryReadOffset = 0; \ > > \ > > pkt.StatisticsEnable = true; \ > > pkt.Enable = true; > > > > -#if GEN_GEN >= 6 > > static void > > genX(upload_vs_state)(struct brw_context *brw) > > { > > + UNUSED struct gl_context *ctx = &brw->ctx; > > const struct gen_device_info *devinfo = &brw->screen->devinfo; > > - const struct brw_stage_state *stage_state = &brw->vs.base; > > + struct brw_stage_state *stage_state = &brw->vs.base; > > > > /* BRW_NEW_VS_PROG_DATA */ > > const struct brw_vue_prog_data *vue_prog_data = > > brw_vue_prog_data(brw->vs.base.prog_data); > > const struct brw_stage_prog_data *stage_prog_data = > > &vue_prog_data->base; > > > > @@ -1752,17 +1777,50 @@ genX(upload_vs_state)(struct brw_context *brw) > > } > > #endif > > > > if (GEN_GEN == 7 && devinfo->is_ivybridge) > > gen7_emit_vs_workaround_flush(brw); > > > > +#if GEN_GEN >= 6 > > brw_batch_emit(brw, GENX(3DSTATE_VS), vs) { > > +#else > > + ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; > > + brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) > > { > > +#endif > > INIT_THREAD_DISPATCH_FIELDS(vs, Vertex); > > > > vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; > > > > +#if GEN_GEN < 6 > > + vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1; > > + vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; > > + vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; > > + > > + vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 > > : 0); > > I was looking and it seems that NumberofURBEntries should be bits 11:17, while > bit 18 should be MBZ. It shouldn't matter much here, just the checks that > genX_pack.h do. Also it's my fault, since I submitted the xml without checking > everything, so I can send a patch to fix that if you prefer. > > Anyway, this patch is: > Reviewed-by: Rafael Antognolli <rafael.antogno...@intel.com> > > > + vs.URBEntryAllocationSize = brw->urb.vsize - 1; > > + > > + vs.MaximumNumberofThreads = > > + CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1; > > + > > + vs.StatisticsEnable = false; > > + vs.SamplerStateOffset = > > + instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset); > > +#endif > > + > > +#if GEN_GEN == 5 > > + /* Force single program flow on Ironlake. We cannot reliably get > > + * all applications working without it. See: > > + * https://bugs.freedesktop.org/show_bug.cgi?id=29172 > > + * > > + * The most notable and reliably failing application is the Humus > > + * demo "CelShading" > > + */ > > + vs.SingleProgramFlow = true; > > + vs.SamplerCount = 0; /* hardware requirement */ > > +#endif > > + > > #if GEN_GEN >= 8 > > vs.SIMD8DispatchEnable = > > vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8; > > > > vs.UserClipDistanceCullTestEnableBitmask = > > vue_prog_data->cull_distance_mask; > > @@ -1798,17 +1856,21 @@ static const struct brw_tracked_state > > genX(vs_state) = { > > .dirty = { > > .mesa = (GEN_GEN == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : > > 0), > > .brw = BRW_NEW_BATCH | > > BRW_NEW_BLORP | > > BRW_NEW_CONTEXT | > > BRW_NEW_VS_PROG_DATA | > > - (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0), > > + (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) | > > + (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | > > + BRW_NEW_PROGRAM_CACHE | > > + BRW_NEW_SAMPLER_STATE_TABLE | > > + BRW_NEW_URB_FENCE > > + : 0), > > }, > > .emit = genX(upload_vs_state), > > }; > > -#endif > > > > /* ---------------------------------------------------------------------- > > */ > > > > #if GEN_GEN >= 6 > > static void > > brw_calculate_guardband_size(const struct gen_device_info *devinfo, > > @@ -3881,13 +3943,13 @@ genX(init_atoms)(struct brw_context *brw) > > &brw_vs_samplers, > > > > /* These set up state for brw_psp_urb_cbs */ > > &brw_wm_unit, > > &brw_sf_vp, > > &brw_sf_unit, > > - &brw_vs_unit, /* always required, enabled or not */ > > + &genX(vs_state), /* always required, enabled or not */ > > &brw_clip_unit, > > &brw_gs_unit, > > > > /* Command packets: > > */ > > &brw_invariant_state, > > -- > > 2.12.2 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev