On Fri, Aug 19, 2016 at 09:55:53AM -0700, Jason Ekstrand wrote: > --- > src/mesa/drivers/dri/i965/Makefile.sources | 15 +- > src/mesa/drivers/dri/i965/blorp_priv.h | 2 +- > src/mesa/drivers/dri/i965/genX_blorp_exec.c | 1113 +------------------------- > src/mesa/drivers/dri/i965/genX_blorp_exec.h | 1121 > +++++++++++++++++++++++++++ > 4 files changed, 1133 insertions(+), 1118 deletions(-) > create mode 100644 src/mesa/drivers/dri/i965/genX_blorp_exec.h > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index 5ea7b96..c97486c 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -259,16 +259,21 @@ i965_FILES = \ > intel_upload.c > > i965_gen6_FILES = \ > - genX_blorp_exec.c > + genX_blorp_exec.c \ > + genX_blorp_exec.h > > i965_gen7_FILES = \ > - genX_blorp_exec.c > + genX_blorp_exec.c \ > + genX_blorp_exec.h > > i965_gen75_FILES = \ > - genX_blorp_exec.c > + genX_blorp_exec.c \ > + genX_blorp_exec.h > > i965_gen8_FILES = \ > - genX_blorp_exec.c > + genX_blorp_exec.c \ > + genX_blorp_exec.h > > i965_gen9_FILES = \ > - genX_blorp_exec.c > + genX_blorp_exec.c \ > + genX_blorp_exec.h > diff --git a/src/mesa/drivers/dri/i965/blorp_priv.h > b/src/mesa/drivers/dri/i965/blorp_priv.h > index 977f54d..9b987a8 100644 > --- a/src/mesa/drivers/dri/i965/blorp_priv.h > +++ b/src/mesa/drivers/dri/i965/blorp_priv.h > @@ -141,7 +141,7 @@ struct brw_blorp_prog_data > */ > uint32_t flat_inputs; > unsigned num_varying_inputs; > - GLbitfield64 inputs_read; > + uint64_t inputs_read; > }; > > static inline unsigned > diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c > b/src/mesa/drivers/dri/i965/genX_blorp_exec.c > index 8c15b16..e07fa0a 100644 > --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c > +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c > @@ -29,9 +29,7 @@ > #include "brw_context.h" > #include "brw_state.h" > > -#include "blorp_priv.h" > - > -#include "genxml/gen_macros.h" > +#include "genX_blorp_exec.h" > > static void * > blorp_emit_dwords(struct blorp_context *blorp, void *batch, unsigned n) > @@ -168,1115 +166,6 @@ blorp_emit_3dstate_multisample(struct blorp_context > *blorp, void *batch, > #endif > } > > -struct blorp_batch { > - struct blorp_context *blorp; > - void *batch; > -}; > - > -#define __gen_address_type struct blorp_address > -#define __gen_user_data struct blorp_batch > - > -static uint64_t > -__gen_combine_address(struct blorp_batch *batch, void *location, > - struct blorp_address address, uint32_t delta) > -{ > - if (address.buffer == NULL) { > - return address.offset + delta; > - } else { > - return blorp_emit_reloc(batch->blorp, batch->batch, > - location, address, delta); > - } > -} > - > -#include "genxml/genX_pack.h" > - > -#define _blorp_cmd_length(cmd) cmd ## _length > -#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias > -#define _blorp_cmd_header(cmd) cmd ## _header > -#define _blorp_cmd_pack(cmd) cmd ## _pack > - > -#define blorp_emit(batch, cmd, name) \ > - for (struct cmd name = { _blorp_cmd_header(cmd) }, \ > - *_dst = blorp_emit_dwords(batch.blorp, batch.batch, \ > - _blorp_cmd_length(cmd)); \ > - __builtin_expect(_dst != NULL, 1); \ > - _blorp_cmd_pack(cmd)(&batch, (void *)_dst, &name), \ > - _dst = NULL) > - > -#define blorp_emitn(batch, cmd, n) ({ \ > - uint32_t *_dw = blorp_emit_dwords(batch.blorp, batch.batch, n); \ > - struct cmd template = { \ > - _blorp_cmd_header(cmd), \ > - .DWordLength = n - _blorp_cmd_length_bias(cmd), \ > - }; \ > - _blorp_cmd_pack(cmd)(&batch, _dw, &template); \ > - _dw + 1; /* Array starts at dw[1] */ \ > - }) > - > -/* Once vertex fetcher has written full VUE entries with complete > - * header the space requirement is as follows per vertex (in bytes): > - * > - * Header Position Program constants > - * +--------+------------+-------------------+ > - * | 16 | 16 | n x 16 | > - * +--------+------------+-------------------+ > - * > - * where 'n' stands for number of varying inputs expressed as vec4s. > - * > - * The URB size is in turn expressed in 64 bytes (512 bits). > - */ > -static inline unsigned > -gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) > -{ > - const unsigned num_varyings = > - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; > - const unsigned total_needed = 16 + 16 + num_varyings * 16; > - > - return DIV_ROUND_UP(total_needed, 64); > -} > - > -/* 3DSTATE_URB > -/* 3DSTATE_URB_VS > - * 3DSTATE_URB_HS > - * 3DSTATE_URB_DS > - * 3DSTATE_URB_GS > - * > - * Assign the entire URB to the VS. Even though the VS disabled, URB space > - * is still needed because the clipper loads the VUE's from the URB. From > - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, > - * Dword 1.15:0 "VS Number of URB Entries": > - * This field is always used (even if VS Function Enable is DISABLED). > - * > - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can > - * safely ignore it because this batch contains only one draw call. > - * Because of URB corruption caused by allocating a previous GS unit > - * URB entry to the VS unit, software is required to send a ???GS NULL > - * Fence??? (Send URB fence with VS URB size == 1 and GS URB size == 0) > - * plus a dummy DRAW call before any case where VS will be taking over > - * GS URB space.
You are dropping all the documentation above. > - * > - * If the 3DSTATE_URB_VS is emitted, than the others must be also. > - * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: > - * > - * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be > - * programmed in order for the programming of this state to be > - * valid. > - */ > -static void > -emit_urb_config(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - blorp_emit_urb_config(batch.blorp, batch.batch, > - gen7_blorp_get_vs_entry_size(params)); > -} > - > -static void > -blorp_emit_vertex_data(struct blorp_batch batch, > - const struct brw_blorp_params *params, > - struct blorp_address *addr, > - uint32_t *size) > -{ > - const float vertices[] = { > - /* v0 */ (float)params->x0, (float)params->y1, > - /* v1 */ (float)params->x1, (float)params->y1, > - /* v2 */ (float)params->x0, (float)params->y0, > - }; > - > - void *data = blorp_alloc_vertex_buffer(batch.blorp, sizeof(vertices), > addr); > - memcpy(data, vertices, sizeof(vertices)); > - *size = sizeof(vertices); > -} > - > -static void > -blorp_emit_input_varying_data(struct blorp_batch batch, > - const struct brw_blorp_params *params, > - struct blorp_address *addr, > - uint32_t *size) > -{ > - const unsigned vec4_size_in_bytes = 4 * sizeof(float); > - const unsigned max_num_varyings = > - DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); > - const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; > - > - *size = num_varyings * vec4_size_in_bytes; > - > - const float *const inputs_src = (const float *)¶ms->wm_inputs; > - float *inputs = blorp_alloc_vertex_buffer(batch.blorp, *size, addr); > - > - /* Walk over the attribute slots, determine if the attribute is used by > - * the program and when necessary copy the values from the input storage > to > - * the vertex data buffer. > - */ > - for (unsigned i = 0; i < max_num_varyings; i++) { > - const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; > - > - if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) > - continue; > - > - memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); > - > - inputs += 4; > - } > -} > - > -static void > -blorp_emit_vertex_buffers(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - struct GENX(VERTEX_BUFFER_STATE) vb[2]; > - memset(vb, 0, sizeof(vb)); > - > - unsigned num_buffers = 1; > - > - uint32_t size; > - blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, > &size); > - vb[0].VertexBufferIndex = 0; > - vb[0].BufferPitch = 2 * sizeof(float); > - vb[0].VertexBufferMOCS = batch.blorp->mocs.vb; > -#if GEN_GEN >= 7 > - vb[0].AddressModifyEnable = true; > -#endif > -#if GEN_GEN >= 8 > - vb[0].BufferSize = size; > -#else > - vb[0].BufferAccessType = VERTEXDATA; > - vb[0].EndAddress = vb[0].BufferStartingAddress; > - vb[0].EndAddress.offset += size - 1; > -#endif > - > - if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) { > - blorp_emit_input_varying_data(batch, params, > - &vb[1].BufferStartingAddress, &size); > - vb[1].VertexBufferIndex = 1; > - vb[1].BufferPitch = 0; > - vb[1].VertexBufferMOCS = batch.blorp->mocs.vb; > -#if GEN_GEN >= 7 > - vb[1].AddressModifyEnable = true; > -#endif > -#if GEN_GEN >= 8 > - vb[1].BufferSize = size; > -#else > - vb[1].BufferAccessType = INSTANCEDATA; > - vb[1].EndAddress = vb[1].BufferStartingAddress; > - vb[1].EndAddress.offset += size - 1; > -#endif > - num_buffers++; > - } > - > - const unsigned num_dwords = > - 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers; > - uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), > num_dwords); > - > - for (unsigned i = 0; i < num_buffers; i++) { > - GENX(VERTEX_BUFFER_STATE_pack)(&batch, dw, &vb[i]); > - dw += GENX(VERTEX_BUFFER_STATE_length); > - } > -} > - > -static void > -blorp_emit_vertex_elements(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - const unsigned num_varyings = > - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; > - const unsigned num_elements = 2 + num_varyings; > - > - struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements]; > - memset(ve, 0, num_elements * sizeof(*ve)); > - > - /* Setup VBO for the rectangle primitive.. > - * > - * A rectangle primitive (3DPRIM_RECTLIST) consists of only three > - * vertices. The vertices reside in screen space with DirectX > - * coordinates (that is, (0, 0) is the upper left corner). > - * > - * v2 ------ implied > - * | | > - * | | > - * v0 ----- v1 > - * > - * Since the VS is disabled, the clipper loads each VUE directly from > - * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and > - * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: > - * dw0: Reserved, MBZ. > - * dw1: Render Target Array Index. The HiZ op does not use indexed > - * vertices, so set the dword to 0. > - * dw2: Viewport Index. The HiZ op disables viewport mapping and > - * scissoring, so set the dword to 0. > - * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, > - * so set the dword to 0. > - * dw4: Vertex Position X. > - * dw5: Vertex Position Y. > - * dw6: Vertex Position Z. > - * dw7: Vertex Position W. > - * > - * dw8: Flat vertex input 0 > - * dw9: Flat vertex input 1 > - * ... > - * dwn: Flat vertex input n - 8 > - * > - * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 > - * "Vertex URB Entry (VUE) Formats". > - * > - * Only vertex position X and Y are going to be variable, Z is fixed to > - * zero and W to one. Header words dw0-3 are all zero. There is no need to > - * include the fixed values in the vertex buffer. Vertex fetcher can be > - * instructed to fill vertex elements with constant values of one and zero > - * instead of reading them from the buffer. > - * Flat inputs are program constants that are not interpolated. Moreover > - * their values will be the same between vertices. > - * > - * See the vertex element setup below. > - */ > - ve[0].VertexBufferIndex = 0; > - ve[0].Valid = true; > - ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; > - ve[0].SourceElementOffset = 0; > - ve[0].Component0Control = VFCOMP_STORE_0; > - ve[0].Component1Control = VFCOMP_STORE_0; > - ve[0].Component2Control = VFCOMP_STORE_0; > - ve[0].Component3Control = VFCOMP_STORE_0; > - > - ve[1].VertexBufferIndex = 0; > - ve[1].Valid = true; > - ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT; > - ve[1].SourceElementOffset = 0; > - ve[1].Component0Control = VFCOMP_STORE_SRC; > - ve[1].Component1Control = VFCOMP_STORE_SRC; > - ve[1].Component2Control = VFCOMP_STORE_0; > - ve[1].Component3Control = VFCOMP_STORE_1_FP; > - > - for (unsigned i = 0; i < num_varyings; ++i) { > - ve[i + 2].VertexBufferIndex = 1; > - ve[i + 2].Valid = true; > - ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; > - ve[i + 2].SourceElementOffset = i * 4 * sizeof(float); > - ve[i + 2].Component0Control = VFCOMP_STORE_SRC; > - ve[i + 2].Component1Control = VFCOMP_STORE_SRC; > - ve[i + 2].Component2Control = VFCOMP_STORE_SRC; > - ve[i + 2].Component3Control = VFCOMP_STORE_SRC; > - } > - > - const unsigned num_dwords = > - 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements; > - uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), > num_dwords); > - > - for (unsigned i = 0; i < num_elements; i++) { > - GENX(VERTEX_ELEMENT_STATE_pack)(&batch, dw, &ve[i]); > - dw += GENX(VERTEX_ELEMENT_STATE_length); > - } > - > -#if GEN_GEN >= 8 > - blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs); > - > - for (unsigned i = 0; i < num_elements; i++) { > - blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) { > - vf.VertexElementIndex = i; > - vf.InstancingEnable = false; > - } > - } > - > - blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { > - topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; > - } > -#endif > -} > - > -static void > -blorp_emit_sf_config(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; > - > - /* 3DSTATE_SF > - * > - * Disable ViewportTransformEnable (dw2.1) > - * > - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D > - * Primitives Overview": > - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the > - * use of screen- space coordinates). > - * > - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) > - * and BackFaceFillMode (dw2.5:6) to SOLID(0). > - * > - * From the Sandy Bridge PRM, Volume 2, Part 1, Section > - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: > - * SOLID: Any triangle or rectangle object found to be front-facing > - * is rendered as a solid object. This setting is required when > - * (rendering rectangle (RECTLIST) objects. > - */ And all this. > - > -#if GEN_GEN >= 8 > - > - blorp_emit(batch, GENX(3DSTATE_SF), sf); > - > - blorp_emit(batch, GENX(3DSTATE_RASTER), raster) { > - raster.CullMode = CULLMODE_NONE; > - } > - > - blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { > - sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > - sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > - sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > - sbe.ForceVertexURBEntryReadLength = true; > - sbe.ForceVertexURBEntryReadOffset = true; > - sbe.ConstantInterpolationEnable = prog_data->flat_inputs; > - > -#if GEN_GEN >= 9 > - for (unsigned i = 0; i < 32; i++) > - sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; > -#endif > - } > - > -#elif GEN_GEN >= 7 > - > - blorp_emit(batch, GENX(3DSTATE_SF), sf) { > - sf.FrontFaceFillMode = FILL_MODE_SOLID; > - sf.BackFaceFillMode = FILL_MODE_SOLID; > - > - sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? > - MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; > - > -#if GEN_GEN == 7 > - sf.DepthBufferSurfaceFormat = params->depth_format; > -#endif > - } > - > - blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { > - sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > - if (prog_data) { > - sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > - sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > - sbe.ConstantInterpolationEnable = prog_data->flat_inputs; > - } else { > - sbe.NumberofSFOutputAttributes = 0; > - sbe.VertexURBEntryReadLength = 1; > - } > - } > - > -#else /* GEN_GEN <= 6 */ > - > - blorp_emit(batch, GENX(3DSTATE_SF), sf) { > - sf.FrontFaceFillMode = FILL_MODE_SOLID; > - sf.BackFaceFillMode = FILL_MODE_SOLID; > - > - sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? > - MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; > - > - sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > - if (prog_data) { > - sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > - sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > - sf.ConstantInterpolationEnable = prog_data->flat_inputs; > - } else { > - sf.NumberofSFOutputAttributes = 0; > - sf.VertexURBEntryReadLength = 1; > - } > - } > - > -#endif /* GEN_GEN */ > -} > - > -static void > -blorp_emit_ps_config(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; > - > - /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be > - * nonzero to prevent the GPU from hanging. While the documentation > doesn't > - * mention this explicitly, it notes that the valid range for the field is > - * [1,39] = [2,40] threads, which excludes zero. > - * > - * To be safe (and to minimize extraneous code) we go ahead and fully > - * configure the WM state whether or not there is a WM program. > - */ And here. > - > -#if GEN_GEN >= 8 > - > - blorp_emit(batch, GENX(3DSTATE_WM), wm); > - > - blorp_emit(batch, GENX(3DSTATE_PS), ps) { > - if (params->src.addr.buffer) { > - ps.SamplerCount = 1; /* Up to 4 samplers */ > - ps.BindingTableEntryCount = 2; > - } else { > - ps.BindingTableEntryCount = 1; > - } > - > - ps.DispatchGRFStartRegisterForConstantSetupData0 = > - prog_data->first_curbe_grf_0; > - ps.DispatchGRFStartRegisterForConstantSetupData2 = > - prog_data->first_curbe_grf_2; > - > - ps._8PixelDispatchEnable = prog_data->dispatch_8; > - ps._16PixelDispatchEnable = prog_data->dispatch_16; > - > - ps.KernelStartPointer0 = params->wm_prog_kernel; > - ps.KernelStartPointer2 = > - params->wm_prog_kernel + prog_data->ksp_offset_2; > - > - /* 3DSTATE_PS expects the number of threads per PSD, which is always > 64; > - * it implicitly scales for different GT levels (which have some # of > - * PSDs). > - * > - * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. > - */ > - if (GEN_GEN >= 9) > - ps.MaximumNumberofThreadsPerPSD = 64 - 1; > - else > - ps.MaximumNumberofThreadsPerPSD = 64 - 2; > - > - switch (params->fast_clear_op) { > -#if GEN_GEN >= 9 > - case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ > - ps.RenderTargetResolveType = RESOLVE_PARTIAL; > - break; > - case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */ > - ps.RenderTargetResolveType = RESOLVE_FULL; > - break; > -#else > - case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ > - ps.RenderTargetResolveEnable = true; > - break; > -#endif > - case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ > - ps.RenderTargetFastClearEnable = true; > - break; > - } > - } > - > - blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) { > - psx.PixelShaderValid = true; > - > - if (params->src.addr.buffer) > - psx.PixelShaderKillsPixel = true; > - > - psx.AttributeEnable = prog_data->num_varying_inputs > 0; > - > - if (prog_data && prog_data->persample_msaa_dispatch) > - psx.PixelShaderIsPerSample = true; > - } > - > -#elif GEN_GEN >= 7 > - > - blorp_emit(batch, GENX(3DSTATE_WM), wm) { > - switch (params->hiz_op) { > - case GEN6_HIZ_OP_DEPTH_CLEAR: > - wm.DepthBufferClear = true; > - break; > - case GEN6_HIZ_OP_DEPTH_RESOLVE: > - wm.DepthBufferResolveEnable = true; > - break; > - case GEN6_HIZ_OP_HIZ_RESOLVE: > - wm.HierarchicalDepthBufferResolveEnable = true; > - break; > - case GEN6_HIZ_OP_NONE: > - break; > - default: > - unreachable("not reached"); > - } > - > - if (prog_data) > - wm.ThreadDispatchEnable = true; > - > - if (params->src.addr.buffer) > - wm.PixelShaderKillPixel = true; > - > - if (params->dst.surf.samples > 1) { > - wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; > - wm.MultisampleDispatchMode = > - (prog_data && prog_data->persample_msaa_dispatch) ? > - MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; > - } else { > - wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; > - wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; > - } > - } > - > - blorp_emit(batch, GENX(3DSTATE_PS), ps) { > - ps.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads > - 1; > - > -#if GEN_IS_HASWELL > - ps.SampleMask = 1; > -#endif > - > - if (prog_data) { > - ps.DispatchGRFStartRegisterforConstantSetupData0 = > - prog_data->first_curbe_grf_0; > - ps.DispatchGRFStartRegisterforConstantSetupData2 = > - prog_data->first_curbe_grf_2; > - > - ps.KernelStartPointer0 = params->wm_prog_kernel; > - ps.KernelStartPointer2 = > - params->wm_prog_kernel + prog_data->ksp_offset_2; > - > - ps._8PixelDispatchEnable = prog_data->dispatch_8; > - ps._16PixelDispatchEnable = prog_data->dispatch_16; > - > - ps.AttributeEnable = prog_data->num_varying_inputs > 0; > - } else { > - /* Gen7 hardware gets angry if we don't enable at least one dispatch > - * mode, so just enable 16-pixel dispatch if we don't have a > program. > - */ > - ps._16PixelDispatchEnable = true; > - } > - > - if (params->src.addr.buffer) > - ps.SamplerCount = 1; /* Up to 4 samplers */ > - > - switch (params->fast_clear_op) { > - case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ > - ps.RenderTargetResolveEnable = true; > - break; > - case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ > - ps.RenderTargetFastClearEnable = true; > - break; > - } > - } > - > -#else /* GEN_GEN <= 6 */ > - > - blorp_emit(batch, GENX(3DSTATE_WM), wm) { > - wm.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads > - 1; > - > - switch (params->hiz_op) { > - case GEN6_HIZ_OP_DEPTH_CLEAR: > - wm.DepthBufferClear = true; > - break; > - case GEN6_HIZ_OP_DEPTH_RESOLVE: > - wm.DepthBufferResolveEnable = true; > - break; > - case GEN6_HIZ_OP_HIZ_RESOLVE: > - wm.HierarchicalDepthBufferResolveEnable = true; > - break; > - case GEN6_HIZ_OP_NONE: > - break; > - default: > - unreachable("not reached"); > - } > - > - if (prog_data) { > - wm.ThreadDispatchEnable = true; > - > - wm.DispatchGRFStartRegisterforConstantSetupData0 = > - prog_data->first_curbe_grf_0; > - wm.DispatchGRFStartRegisterforConstantSetupData2 = > - prog_data->first_curbe_grf_2; > - > - wm.KernelStartPointer0 = params->wm_prog_kernel; > - wm.KernelStartPointer2 = > - params->wm_prog_kernel + prog_data->ksp_offset_2; > - > - wm._8PixelDispatchEnable = prog_data->dispatch_8; > - wm._16PixelDispatchEnable = prog_data->dispatch_16; > - > - wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > - } > - > - if (params->src.addr.buffer) { > - wm.SamplerCount = 1; /* Up to 4 samplers */ > - wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */ > - } > - > - if (params->dst.surf.samples > 1) { > - wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; > - wm.MultisampleDispatchMode = > - (prog_data && prog_data->persample_msaa_dispatch) ? > - MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; > - } else { > - wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; > - wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; > - } > - } > - > -#endif /* GEN_GEN */ > -} > - > - > -static void > -blorp_emit_depth_stencil_config(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > -#if GEN_GEN >= 7 > - const uint32_t mocs = 1; /* GEN7_MOCS_L3 */ > -#else > - const uint32_t mocs = 0; > -#endif > - > - blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) { > - switch (params->depth.surf.dim) { > - case ISL_SURF_DIM_1D: > - db.SurfaceType = SURFTYPE_1D; > - break; > - case ISL_SURF_DIM_2D: > - db.SurfaceType = SURFTYPE_2D; > - break; > - case ISL_SURF_DIM_3D: > - db.SurfaceType = SURFTYPE_3D; > - break; > - } > - > - db.SurfaceFormat = params->depth_format; > - > -#if GEN_GEN >= 7 > - db.DepthWriteEnable = true; > -#endif > - > -#if GEN_GEN <= 6 > - db.TiledSurface = true; > - db.TileWalk = TILEWALK_YMAJOR; > - db.MIPMapLayoutMode = MIPLAYOUT_BELOW; > - db.SeparateStencilBufferEnable = true; > -#endif > - > - db.HierarchicalDepthBufferEnable = true; > - > - db.Width = params->depth.surf.logical_level0_px.width - 1; > - db.Height = params->depth.surf.logical_level0_px.height - 1; > - db.RenderTargetViewExtent = db.Depth = > - MAX2(params->depth.surf.logical_level0_px.depth, > - params->depth.surf.logical_level0_px.array_len) - 1; > - > - db.LOD = params->depth.view.base_level; > - db.MinimumArrayElement = params->depth.view.base_array_layer; > - > - db.SurfacePitch = params->depth.surf.row_pitch - 1; > - db.SurfaceBaseAddress = params->depth.addr; > - db.DepthBufferMOCS = mocs; > - } > - > - blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { > - hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1; > - hiz.SurfaceBaseAddress = params->depth.aux_addr; > - hiz.HierarchicalDepthBufferMOCS = mocs; > - } > - > - blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb); > -} > - > -static uint32_t > -blorp_emit_blend_state(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - struct GENX(BLEND_STATE) blend; > - memset(&blend, 0, sizeof(blend)); > - > - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { > - blend.Entry[i].PreBlendColorClampEnable = true; > - blend.Entry[i].PostBlendColorClampEnable = true; > - blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT; > - > - blend.Entry[i].WriteDisableRed = params->color_write_disable[0]; > - blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]; > - blend.Entry[i].WriteDisableBlue = params->color_write_disable[2]; > - blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]; > - } > - > - uint32_t offset; > - void *state = blorp_alloc_dynamic_state(batch.blorp, > - AUB_TRACE_BLEND_STATE, > - GENX(BLEND_STATE_length) * 4, > - 64, &offset); > - GENX(BLEND_STATE_pack)(NULL, state, &blend); > - > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { > - sp.BlendStatePointer = offset; > -#if GEN_GEN >= 8 > - sp.BlendStatePointerValid = true; > -#endif > - } > -#endif > - > -#if GEN_GEN >= 8 > - blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) { > - ps_blend.HasWriteableRT = true; > - } > -#endif > - > - return offset; > -} > - > -static uint32_t > -blorp_emit_color_calc_state(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - uint32_t offset; > - void *state = blorp_alloc_dynamic_state(batch.blorp, > - AUB_TRACE_CC_STATE, > - GENX(COLOR_CALC_STATE_length) * 4, > - 64, &offset); > - memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4); > - > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) { > - sp.ColorCalcStatePointer = offset; > -#if GEN_GEN >= 8 > - sp.ColorCalcStatePointerValid = true; > -#endif > - } > -#endif > - > - return offset; > -} > - > -static uint32_t > -blorp_emit_depth_stencil_state(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > -#if GEN_GEN >= 8 > - > - /* On gen8+, DEPTH_STENCIL state is simply an instruction */ > - blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds); > - return 0; > - > -#else /* GEN_GEN <= 7 */ > - > - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: > - * - 7.5.3.1 Depth Buffer Clear > - * - 7.5.3.2 Depth Buffer Resolve > - * - 7.5.3.3 Hierarchical Depth Buffer Resolve > - */ > - struct GENX(DEPTH_STENCIL_STATE) ds = { > - .DepthBufferWriteEnable = true, > - }; > - > - if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { > - ds.DepthTestEnable = true; > - ds.DepthTestFunction = COMPAREFUNCTION_NEVER; > - } > - > - uint32_t offset; > - void *state = blorp_alloc_dynamic_state(batch.blorp, > - AUB_TRACE_DEPTH_STENCIL_STATE, > - GENX(DEPTH_STENCIL_STATE_length) > * 4, > - 64, &offset); > - GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); > - > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) { > - sp.PointertoDEPTH_STENCIL_STATE = offset; > - } > -#endif > - > - return offset; > - > -#endif /* GEN_GEN */ > -} > - > -struct surface_state_info { > - unsigned num_dwords; > - unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes > */ > - unsigned reloc_dw; > - unsigned aux_reloc_dw; > -}; > - > -static const struct surface_state_info surface_state_infos[] = { > - [6] = {6, 32, 1, 0}, > - [7] = {8, 32, 1, 6}, > - [8] = {13, 64, 8, 10}, > - [9] = {16, 64, 8, 10}, > -}; > - > -static void > -blorp_emit_surface_state(struct blorp_context *blorp, > - const struct brw_blorp_surface_info *surface, > - uint32_t *state, uint32_t state_offset, > - bool is_render_target) > -{ > - const struct surface_state_info ss_info = surface_state_infos[GEN_GEN]; > - > - struct isl_surf surf = surface->surf; > - > - if (surf.dim == ISL_SURF_DIM_1D && > - surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) { > - assert(surf.logical_level0_px.height == 1); > - surf.dim = ISL_SURF_DIM_2D; > - } > - > - /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */ > - enum isl_aux_usage aux_usage = surface->aux_usage; > - if (aux_usage == ISL_AUX_USAGE_HIZ) > - aux_usage = ISL_AUX_USAGE_NONE; > - > - const uint32_t mocs = is_render_target ? blorp->mocs.rb : blorp->mocs.tex; > - > - isl_surf_fill_state(blorp->isl_dev, state, > - .surf = &surf, .view = &surface->view, > - .aux_surf = &surface->aux_surf, .aux_usage = > aux_usage, > - .mocs = mocs, .clear_color = surface->clear_color, > - .x_offset_sa = surface->tile_x_sa, > - .y_offset_sa = surface->tile_y_sa); > - > - blorp_surface_reloc(blorp, state_offset + ss_info.reloc_dw * 4, > - surface->addr, 0); > - > - if (aux_usage != ISL_AUX_USAGE_NONE) { > - /* On gen7 and prior, the bottom 12 bits of the MCS base address are > - * used to store other information. This should be ok, however, > because > - * surface buffer addresses are always 4K page alinged. > - */ > - assert((surface->aux_addr.offset & 0xfff) == 0); > - blorp_surface_reloc(blorp, state_offset + ss_info.aux_reloc_dw * 4, > - surface->aux_addr, state[ss_info.aux_reloc_dw]); > - } > -} > - > -static void > -blorp_emit_surface_states(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - uint32_t bind_offset, *bind_map; > - void *surface_maps[2]; > - > - const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4; > - const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ? 64 : 32; > - > - unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL); > - blorp_alloc_binding_table(batch.blorp, num_surfaces, ss_size, ss_align, > - &bind_offset, &bind_map, surface_maps); > - > - blorp_emit_surface_state(batch.blorp, ¶ms->dst, > - surface_maps[BLORP_RENDERBUFFER_BT_INDEX], > - bind_map[BLORP_RENDERBUFFER_BT_INDEX], true); > - if (params->src.addr.buffer) { > - blorp_emit_surface_state(batch.blorp, ¶ms->src, > - surface_maps[BLORP_TEXTURE_BT_INDEX], > - bind_map[BLORP_TEXTURE_BT_INDEX], false); > - } > - > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) { > - bt.PointertoPSBindingTable = bind_offset; > - } > -#else > - blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { > - bt.PSBindingTableChange = true; > - bt.PointertoPSBindingTable = bind_offset; > - } > -#endif > -} > - > -static void > -blorp_emit_sampler_state(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - struct GENX(SAMPLER_STATE) sampler = { > - .MipModeFilter = MIPFILTER_NONE, > - .MagModeFilter = MAPFILTER_LINEAR, > - .MinModeFilter = MAPFILTER_LINEAR, > - .MinLOD = 0, > - .MaxLOD = 0, > - .TCXAddressControlMode = TCM_CLAMP, > - .TCYAddressControlMode = TCM_CLAMP, > - .TCZAddressControlMode = TCM_CLAMP, > - .MaximumAnisotropy = RATIO21, > - .RAddressMinFilterRoundingEnable = true, > - .RAddressMagFilterRoundingEnable = true, > - .VAddressMinFilterRoundingEnable = true, > - .VAddressMagFilterRoundingEnable = true, > - .UAddressMinFilterRoundingEnable = true, > - .UAddressMagFilterRoundingEnable = true, > - .NonnormalizedCoordinateEnable = true, > - }; > - > - uint32_t offset; > - void *state = blorp_alloc_dynamic_state(batch.blorp, > - AUB_TRACE_SAMPLER_STATE, > - GENX(SAMPLER_STATE_length) * 4, > - 32, &offset); > - GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); > - > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { > - ssp.PointertoPSSamplerState = offset; > - } > -#else > - blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { > - ssp.VSSamplerStateChange = true; > - ssp.GSSamplerStateChange = true; > - ssp.PSSamplerStateChange = true; > - ssp.PointertoPSSamplerState = offset; > - } > -#endif > -} > - > -/* 3DSTATE_VIEWPORT_STATE_POINTERS */ > -static void > -blorp_emit_viewport_state(struct blorp_batch batch, > - const struct brw_blorp_params *params) > -{ > - uint32_t cc_vp_offset; > - > - void *state = blorp_alloc_dynamic_state(batch.blorp, > - AUB_TRACE_CC_VP_STATE, > - GENX(CC_VIEWPORT_length) * 4, 32, > - &cc_vp_offset); > - > - GENX(CC_VIEWPORT_pack)(&batch, state, > - &(struct GENX(CC_VIEWPORT)) { > - .MinimumDepth = 0.0, > - .MaximumDepth = 1.0, > - }); > - > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { > - vsp.CCViewportPointer = cc_vp_offset; > - } > -#else > - blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { > - vsp.CCViewportStateChange = true; > - vsp.PointertoCC_VIEWPORT = cc_vp_offset; > - } > -#endif > -} > - > - > -/** > - * \brief Execute a blit or render pass operation. > - * > - * To execute the operation, this function manually constructs and emits a > - * batch to draw a rectangle primitive. The batchbuffer is flushed before > - * constructing and after emitting the batch. > - * > - * This function alters no GL state. > - */ > -static void > -blorp_exec(struct blorp_context *blorp, void *batch_data, > - const struct brw_blorp_params *params) > -{ > - struct blorp_batch batch = { > - .blorp = blorp, > - .batch = batch_data, > - }; > - > - uint32_t blend_state_offset = 0; > - uint32_t color_calc_state_offset = 0; > - uint32_t depth_stencil_state_offset; > - > - blorp_emit_vertex_buffers(batch, params); > - blorp_emit_vertex_elements(batch, params); > - > - emit_urb_config(batch, params); > - > - if (params->wm_prog_data) { > - blend_state_offset = blorp_emit_blend_state(batch, params); > - color_calc_state_offset = blorp_emit_color_calc_state(batch, params); > - } > - depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, > params); > - > -#if GEN_GEN <= 6 > - /* 3DSTATE_CC_STATE_POINTERS > - * > - * The pointer offsets are relative to > - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. > - * > - * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. Here also. > - * > - * The dynamic state emit helpers emit their own STATE_POINTERS packets on > - * gen7+. However, on gen6 and earlier, they're all lumpped together in > - * one CC_STATE_POINTERS packet so we have to emit that here. > - */ > - blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) { > - cc.BLEND_STATEChange = true; > - cc.COLOR_CALC_STATEChange = true; > - cc.DEPTH_STENCIL_STATEChange = true; > - cc.PointertoBLEND_STATE = blend_state_offset; > - cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; > - cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; > - } > -#else > - (void)blend_state_offset; > - (void)color_calc_state_offset; > - (void)depth_stencil_state_offset; > -#endif > - > - blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs); > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs); > - blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS); > -#endif > - blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs); > - blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps); > - > - if (params->wm_prog_data) > - blorp_emit_surface_states(batch, params); > - > - if (params->src.addr.buffer) > - blorp_emit_sampler_state(batch, params); > - > - blorp_emit_3dstate_multisample(batch.blorp, batch.batch, > - params->dst.surf.samples); > - > - blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) { > - mask.SampleMask = (1 << params->dst.surf.samples) - 1; > - } > - > - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, > - * 3DSTATE_VS, Dword 5.0 "VS Function Enable": > - * > - * [DevSNB] A pipeline flush must be programmed prior to a > - * 3DSTATE_VS command that causes the VS Function Enable to > - * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL > - * command with CS stall bit set and a post sync operation. > - * > - * We've already done one at the start of the BLORP operation. And here. > - */ > - blorp_emit(batch, GENX(3DSTATE_VS), vs); > -#if GEN_GEN >= 7 > - blorp_emit(batch, GENX(3DSTATE_HS), hs); > - blorp_emit(batch, GENX(3DSTATE_TE), te); > - blorp_emit(batch, GENX(3DSTATE_DS), DS); > - blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so); > -#endif > - blorp_emit(batch, GENX(3DSTATE_GS), gs); > - > - blorp_emit(batch, GENX(3DSTATE_CLIP), clip) { > - clip.PerspectiveDivideDisable = true; > - } > - > - blorp_emit_sf_config(batch, params); > - blorp_emit_ps_config(batch, params); > - > - blorp_emit_viewport_state(batch, params); > - > - if (params->depth.addr.buffer) { > - blorp_emit_depth_stencil_config(batch, params); > - } else { > - blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) { > - db.SurfaceType = SURFTYPE_NULL; > - db.SurfaceFormat = D32_FLOAT; > - } > - blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz); > - blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb); > - } > - > - /* 3DSTATE_CLEAR_PARAMS > - * > - * From the Sandybridge PRM, Volume 2, Part 1, Section > 3DSTATE_CLEAR_PARAMS: > - * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the > DEPTH_BUFFER_STATE > - * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. And finally here. Otherwise it looks that code is just moved. I would keep the documentation, and with that: Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > - */ > - blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) { > - clear.DepthClearValueValid = true; > - clear.DepthClearValue = params->depth.clear_color.u32[0]; > - } > - > - blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { > - rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; > - rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; > - } > - > - blorp_emit(batch, GENX(3DPRIMITIVE), prim) { > - prim.VertexAccessType = SEQUENTIAL; > - prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; > - prim.VertexCountPerInstance = 3; > - prim.InstanceCount = params->num_layers; > - } > -} > - > void > genX(blorp_exec)(struct brw_context *brw, > const struct brw_blorp_params *params) > diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.h > b/src/mesa/drivers/dri/i965/genX_blorp_exec.h > new file mode 100644 > index 0000000..02a0397 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.h > @@ -0,0 +1,1121 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "blorp_priv.h" > +#include "brw_device_info.h" > +#include "intel_aub.h" > + > +/** > + * This file provides the blorp pipeline setup and execution functionality. > + * It defines the following function: > + * > + * static void > + * blorp_exec(struct blorp_context *blorp, void *batch_data, > + * const struct brw_blorp_params *params); > + * > + * It is the job of whoever includes this header to wrap this in something > + * to get an externally visible symbol. > + * > + * In order for the blorp_exec function to work, the driver must provide > + * implementations of the following static helper functions. > + */ > + > +static void * > +blorp_emit_dwords(struct blorp_context *blorp, void *batch, unsigned n); > + > +static uint64_t > +blorp_emit_reloc(struct blorp_context *blorp, void *batch, > + void *location, struct blorp_address address, uint32_t > delta); > + > +static void * > +blorp_alloc_dynamic_state(struct blorp_context *blorp, > + enum aub_state_struct_type type, > + uint32_t size, > + uint32_t alignment, > + uint32_t *offset); > +static void * > +blorp_alloc_vertex_buffer(struct blorp_context *blorp, uint32_t size, > + struct blorp_address *addr); > + > +static void > +blorp_alloc_binding_table(struct blorp_context *blorp, unsigned num_entries, > + unsigned state_size, unsigned state_alignment, > + uint32_t *bt_offset, uint32_t **bt_map, > + void **surface_maps); > +static void > +blorp_surface_reloc(struct blorp_context *blorp, uint32_t ss_offset, > + struct blorp_address address, uint32_t delta); > + > +static void > +blorp_emit_urb_config(struct blorp_context *blorp, void *batch, > + unsigned vs_entry_size); > +static void > +blorp_emit_3dstate_multisample(struct blorp_context *blorp, void *batch, > + unsigned samples); > + > +/***** BEGIN blorp_exec implementation ******/ > + > +#include "genxml/gen_macros.h" > + > +struct blorp_batch { > + struct blorp_context *blorp; > + void *batch; > +}; > + > +#define __gen_address_type struct blorp_address > +#define __gen_user_data struct blorp_batch > + > +static uint64_t > +__gen_combine_address(struct blorp_batch *batch, void *location, > + struct blorp_address address, uint32_t delta) > +{ > + if (address.buffer == NULL) { > + return address.offset + delta; > + } else { > + return blorp_emit_reloc(batch->blorp, batch->batch, > + location, address, delta); > + } > +} > + > +#include "genxml/genX_pack.h" > + > +#define _blorp_cmd_length(cmd) cmd ## _length > +#define _blorp_cmd_length_bias(cmd) cmd ## _length_bias > +#define _blorp_cmd_header(cmd) cmd ## _header > +#define _blorp_cmd_pack(cmd) cmd ## _pack > + > +#define blorp_emit(batch, cmd, name) \ > + for (struct cmd name = { _blorp_cmd_header(cmd) }, \ > + *_dst = blorp_emit_dwords(batch.blorp, batch.batch, \ > + _blorp_cmd_length(cmd)); \ > + __builtin_expect(_dst != NULL, 1); \ > + _blorp_cmd_pack(cmd)(&batch, (void *)_dst, &name), \ > + _dst = NULL) > + > +#define blorp_emitn(batch, cmd, n) ({ \ > + uint32_t *_dw = blorp_emit_dwords(batch.blorp, batch.batch, n); \ > + struct cmd template = { \ > + _blorp_cmd_header(cmd), \ > + .DWordLength = n - _blorp_cmd_length_bias(cmd), \ > + }; \ > + _blorp_cmd_pack(cmd)(&batch, _dw, &template); \ > + _dw + 1; /* Array starts at dw[1] */ \ > + }) > + > +/* Once vertex fetcher has written full VUE entries with complete > + * header the space requirement is as follows per vertex (in bytes): > + * > + * Header Position Program constants > + * +--------+------------+-------------------+ > + * | 16 | 16 | n x 16 | > + * +--------+------------+-------------------+ > + * > + * where 'n' stands for number of varying inputs expressed as vec4s. > + * > + * The URB size is in turn expressed in 64 bytes (512 bits). > + */ > +static inline unsigned > +gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) > +{ > + const unsigned num_varyings = > + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; > + const unsigned total_needed = 16 + 16 + num_varyings * 16; > + > + return DIV_ROUND_UP(total_needed, 64); > +} > + > +/* 3DSTATE_URB_VS > + * 3DSTATE_URB_HS > + * 3DSTATE_URB_DS > + * 3DSTATE_URB_GS > + * > + * If the 3DSTATE_URB_VS is emitted, than the others must be also. > + * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: > + * > + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be > + * programmed in order for the programming of this state to be > + * valid. > + */ > +static void > +emit_urb_config(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + blorp_emit_urb_config(batch.blorp, batch.batch, > + gen7_blorp_get_vs_entry_size(params)); > +} > + > +static void > +blorp_emit_vertex_data(struct blorp_batch batch, > + const struct brw_blorp_params *params, > + struct blorp_address *addr, > + uint32_t *size) > +{ > + const float vertices[] = { > + /* v0 */ (float)params->x0, (float)params->y1, > + /* v1 */ (float)params->x1, (float)params->y1, > + /* v2 */ (float)params->x0, (float)params->y0, > + }; > + > + void *data = blorp_alloc_vertex_buffer(batch.blorp, sizeof(vertices), > addr); > + memcpy(data, vertices, sizeof(vertices)); > + *size = sizeof(vertices); > +} > + > +static void > +blorp_emit_input_varying_data(struct blorp_batch batch, > + const struct brw_blorp_params *params, > + struct blorp_address *addr, > + uint32_t *size) > +{ > + const unsigned vec4_size_in_bytes = 4 * sizeof(float); > + const unsigned max_num_varyings = > + DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); > + const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; > + > + *size = num_varyings * vec4_size_in_bytes; > + > + const float *const inputs_src = (const float *)¶ms->wm_inputs; > + float *inputs = blorp_alloc_vertex_buffer(batch.blorp, *size, addr); > + > + /* Walk over the attribute slots, determine if the attribute is used by > + * the program and when necessary copy the values from the input storage > to > + * the vertex data buffer. > + */ > + for (unsigned i = 0; i < max_num_varyings; i++) { > + const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; > + > + if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) > + continue; > + > + memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); > + > + inputs += 4; > + } > +} > + > +static void > +blorp_emit_vertex_buffers(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + struct GENX(VERTEX_BUFFER_STATE) vb[2]; > + memset(vb, 0, sizeof(vb)); > + > + unsigned num_buffers = 1; > + > + uint32_t size; > + blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, > &size); > + vb[0].VertexBufferIndex = 0; > + vb[0].BufferPitch = 2 * sizeof(float); > + vb[0].VertexBufferMOCS = batch.blorp->mocs.vb; > +#if GEN_GEN >= 7 > + vb[0].AddressModifyEnable = true; > +#endif > +#if GEN_GEN >= 8 > + vb[0].BufferSize = size; > +#else > + vb[0].BufferAccessType = VERTEXDATA; > + vb[0].EndAddress = vb[0].BufferStartingAddress; > + vb[0].EndAddress.offset += size - 1; > +#endif > + > + if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) { > + blorp_emit_input_varying_data(batch, params, > + &vb[1].BufferStartingAddress, &size); > + vb[1].VertexBufferIndex = 1; > + vb[1].BufferPitch = 0; > + vb[1].VertexBufferMOCS = batch.blorp->mocs.vb; > +#if GEN_GEN >= 7 > + vb[1].AddressModifyEnable = true; > +#endif > +#if GEN_GEN >= 8 > + vb[1].BufferSize = size; > +#else > + vb[1].BufferAccessType = INSTANCEDATA; > + vb[1].EndAddress = vb[1].BufferStartingAddress; > + vb[1].EndAddress.offset += size - 1; > +#endif > + num_buffers++; > + } > + > + const unsigned num_dwords = > + 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers; > + uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), > num_dwords); > + > + for (unsigned i = 0; i < num_buffers; i++) { > + GENX(VERTEX_BUFFER_STATE_pack)(&batch, dw, &vb[i]); > + dw += GENX(VERTEX_BUFFER_STATE_length); > + } > +} > + > +static void > +blorp_emit_vertex_elements(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + const unsigned num_varyings = > + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; > + const unsigned num_elements = 2 + num_varyings; > + > + struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements]; > + memset(ve, 0, num_elements * sizeof(*ve)); > + > + /* Setup VBO for the rectangle primitive.. > + * > + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three > + * vertices. The vertices reside in screen space with DirectX > + * coordinates (that is, (0, 0) is the upper left corner). > + * > + * v2 ------ implied > + * | | > + * | | > + * v0 ----- v1 > + * > + * Since the VS is disabled, the clipper loads each VUE directly from > + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and > + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: > + * dw0: Reserved, MBZ. > + * dw1: Render Target Array Index. The HiZ op does not use indexed > + * vertices, so set the dword to 0. > + * dw2: Viewport Index. The HiZ op disables viewport mapping and > + * scissoring, so set the dword to 0. > + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, > + * so set the dword to 0. > + * dw4: Vertex Position X. > + * dw5: Vertex Position Y. > + * dw6: Vertex Position Z. > + * dw7: Vertex Position W. > + * > + * dw8: Flat vertex input 0 > + * dw9: Flat vertex input 1 > + * ... > + * dwn: Flat vertex input n - 8 > + * > + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 > + * "Vertex URB Entry (VUE) Formats". > + * > + * Only vertex position X and Y are going to be variable, Z is fixed to > + * zero and W to one. Header words dw0-3 are all zero. There is no need to > + * include the fixed values in the vertex buffer. Vertex fetcher can be > + * instructed to fill vertex elements with constant values of one and zero > + * instead of reading them from the buffer. > + * Flat inputs are program constants that are not interpolated. Moreover > + * their values will be the same between vertices. > + * > + * See the vertex element setup below. > + */ > + ve[0].VertexBufferIndex = 0; > + ve[0].Valid = true; > + ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; > + ve[0].SourceElementOffset = 0; > + ve[0].Component0Control = VFCOMP_STORE_0; > + ve[0].Component1Control = VFCOMP_STORE_0; > + ve[0].Component2Control = VFCOMP_STORE_0; > + ve[0].Component3Control = VFCOMP_STORE_0; > + > + ve[1].VertexBufferIndex = 0; > + ve[1].Valid = true; > + ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT; > + ve[1].SourceElementOffset = 0; > + ve[1].Component0Control = VFCOMP_STORE_SRC; > + ve[1].Component1Control = VFCOMP_STORE_SRC; > + ve[1].Component2Control = VFCOMP_STORE_0; > + ve[1].Component3Control = VFCOMP_STORE_1_FP; > + > + for (unsigned i = 0; i < num_varyings; ++i) { > + ve[i + 2].VertexBufferIndex = 1; > + ve[i + 2].Valid = true; > + ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; > + ve[i + 2].SourceElementOffset = i * 4 * sizeof(float); > + ve[i + 2].Component0Control = VFCOMP_STORE_SRC; > + ve[i + 2].Component1Control = VFCOMP_STORE_SRC; > + ve[i + 2].Component2Control = VFCOMP_STORE_SRC; > + ve[i + 2].Component3Control = VFCOMP_STORE_SRC; > + } > + > + const unsigned num_dwords = > + 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements; > + uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), > num_dwords); > + > + for (unsigned i = 0; i < num_elements; i++) { > + GENX(VERTEX_ELEMENT_STATE_pack)(&batch, dw, &ve[i]); > + dw += GENX(VERTEX_ELEMENT_STATE_length); > + } > + > +#if GEN_GEN >= 8 > + blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs); > + > + for (unsigned i = 0; i < num_elements; i++) { > + blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) { > + vf.VertexElementIndex = i; > + vf.InstancingEnable = false; > + } > + } > + > + blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { > + topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; > + } > +#endif > +} > + > +static void > +blorp_emit_sf_config(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; > + > +#if GEN_GEN >= 8 > + > + blorp_emit(batch, GENX(3DSTATE_SF), sf); > + > + blorp_emit(batch, GENX(3DSTATE_RASTER), raster) { > + raster.CullMode = CULLMODE_NONE; > + } > + > + blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { > + sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > + sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > + sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > + sbe.ForceVertexURBEntryReadLength = true; > + sbe.ForceVertexURBEntryReadOffset = true; > + sbe.ConstantInterpolationEnable = prog_data->flat_inputs; > + > +#if GEN_GEN >= 9 > + for (unsigned i = 0; i < 32; i++) > + sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; > +#endif > + } > + > +#elif GEN_GEN >= 7 > + > + blorp_emit(batch, GENX(3DSTATE_SF), sf) { > + sf.FrontFaceFillMode = FILL_MODE_SOLID; > + sf.BackFaceFillMode = FILL_MODE_SOLID; > + > + sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? > + MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; > + > +#if GEN_GEN == 7 > + sf.DepthBufferSurfaceFormat = params->depth_format; > +#endif > + } > + > + blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { > + sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > + if (prog_data) { > + sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > + sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > + sbe.ConstantInterpolationEnable = prog_data->flat_inputs; > + } else { > + sbe.NumberofSFOutputAttributes = 0; > + sbe.VertexURBEntryReadLength = 1; > + } > + } > + > +#else /* GEN_GEN <= 6 */ > + > + blorp_emit(batch, GENX(3DSTATE_SF), sf) { > + sf.FrontFaceFillMode = FILL_MODE_SOLID; > + sf.BackFaceFillMode = FILL_MODE_SOLID; > + > + sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? > + MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; > + > + sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > + if (prog_data) { > + sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > + sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > + sf.ConstantInterpolationEnable = prog_data->flat_inputs; > + } else { > + sf.NumberofSFOutputAttributes = 0; > + sf.VertexURBEntryReadLength = 1; > + } > + } > + > +#endif /* GEN_GEN */ > +} > + > +static void > +blorp_emit_ps_config(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; > + > +#if GEN_GEN >= 8 > + > + blorp_emit(batch, GENX(3DSTATE_WM), wm); > + > + blorp_emit(batch, GENX(3DSTATE_PS), ps) { > + if (params->src.addr.buffer) { > + ps.SamplerCount = 1; /* Up to 4 samplers */ > + ps.BindingTableEntryCount = 2; > + } else { > + ps.BindingTableEntryCount = 1; > + } > + > + ps.DispatchGRFStartRegisterForConstantSetupData0 = > + prog_data->first_curbe_grf_0; > + ps.DispatchGRFStartRegisterForConstantSetupData2 = > + prog_data->first_curbe_grf_2; > + > + ps._8PixelDispatchEnable = prog_data->dispatch_8; > + ps._16PixelDispatchEnable = prog_data->dispatch_16; > + > + ps.KernelStartPointer0 = params->wm_prog_kernel; > + ps.KernelStartPointer2 = > + params->wm_prog_kernel + prog_data->ksp_offset_2; > + > + /* 3DSTATE_PS expects the number of threads per PSD, which is always > 64; > + * it implicitly scales for different GT levels (which have some # of > + * PSDs). > + * > + * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. > + */ > + if (GEN_GEN >= 9) > + ps.MaximumNumberofThreadsPerPSD = 64 - 1; > + else > + ps.MaximumNumberofThreadsPerPSD = 64 - 2; > + > + switch (params->fast_clear_op) { > +#if GEN_GEN >= 9 > + case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ > + ps.RenderTargetResolveType = RESOLVE_PARTIAL; > + break; > + case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */ > + ps.RenderTargetResolveType = RESOLVE_FULL; > + break; > +#else > + case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ > + ps.RenderTargetResolveEnable = true; > + break; > +#endif > + case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ > + ps.RenderTargetFastClearEnable = true; > + break; > + } > + } > + > + blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) { > + psx.PixelShaderValid = true; > + > + if (params->src.addr.buffer) > + psx.PixelShaderKillsPixel = true; > + > + psx.AttributeEnable = prog_data->num_varying_inputs > 0; > + > + if (prog_data && prog_data->persample_msaa_dispatch) > + psx.PixelShaderIsPerSample = true; > + } > + > +#elif GEN_GEN >= 7 > + > + blorp_emit(batch, GENX(3DSTATE_WM), wm) { > + switch (params->hiz_op) { > + case GEN6_HIZ_OP_DEPTH_CLEAR: > + wm.DepthBufferClear = true; > + break; > + case GEN6_HIZ_OP_DEPTH_RESOLVE: > + wm.DepthBufferResolveEnable = true; > + break; > + case GEN6_HIZ_OP_HIZ_RESOLVE: > + wm.HierarchicalDepthBufferResolveEnable = true; > + break; > + case GEN6_HIZ_OP_NONE: > + break; > + default: > + unreachable("not reached"); > + } > + > + if (prog_data) > + wm.ThreadDispatchEnable = true; > + > + if (params->src.addr.buffer) > + wm.PixelShaderKillPixel = true; > + > + if (params->dst.surf.samples > 1) { > + wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; > + wm.MultisampleDispatchMode = > + (prog_data && prog_data->persample_msaa_dispatch) ? > + MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; > + } else { > + wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; > + wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; > + } > + } > + > + blorp_emit(batch, GENX(3DSTATE_PS), ps) { > + ps.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads > - 1; > + > +#if GEN_IS_HASWELL > + ps.SampleMask = 1; > +#endif > + > + if (prog_data) { > + ps.DispatchGRFStartRegisterforConstantSetupData0 = > + prog_data->first_curbe_grf_0; > + ps.DispatchGRFStartRegisterforConstantSetupData2 = > + prog_data->first_curbe_grf_2; > + > + ps.KernelStartPointer0 = params->wm_prog_kernel; > + ps.KernelStartPointer2 = > + params->wm_prog_kernel + prog_data->ksp_offset_2; > + > + ps._8PixelDispatchEnable = prog_data->dispatch_8; > + ps._16PixelDispatchEnable = prog_data->dispatch_16; > + > + ps.AttributeEnable = prog_data->num_varying_inputs > 0; > + } else { > + /* Gen7 hardware gets angry if we don't enable at least one dispatch > + * mode, so just enable 16-pixel dispatch if we don't have a > program. > + */ > + ps._16PixelDispatchEnable = true; > + } > + > + if (params->src.addr.buffer) > + ps.SamplerCount = 1; /* Up to 4 samplers */ > + > + switch (params->fast_clear_op) { > + case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ > + ps.RenderTargetResolveEnable = true; > + break; > + case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ > + ps.RenderTargetFastClearEnable = true; > + break; > + } > + } > + > +#else /* GEN_GEN <= 6 */ > + > + blorp_emit(batch, GENX(3DSTATE_WM), wm) { > + wm.MaximumNumberofThreads = batch.blorp->isl_dev->info->max_wm_threads > - 1; > + > + switch (params->hiz_op) { > + case GEN6_HIZ_OP_DEPTH_CLEAR: > + wm.DepthBufferClear = true; > + break; > + case GEN6_HIZ_OP_DEPTH_RESOLVE: > + wm.DepthBufferResolveEnable = true; > + break; > + case GEN6_HIZ_OP_HIZ_RESOLVE: > + wm.HierarchicalDepthBufferResolveEnable = true; > + break; > + case GEN6_HIZ_OP_NONE: > + break; > + default: > + unreachable("not reached"); > + } > + > + if (prog_data) { > + wm.ThreadDispatchEnable = true; > + > + wm.DispatchGRFStartRegisterforConstantSetupData0 = > + prog_data->first_curbe_grf_0; > + wm.DispatchGRFStartRegisterforConstantSetupData2 = > + prog_data->first_curbe_grf_2; > + > + wm.KernelStartPointer0 = params->wm_prog_kernel; > + wm.KernelStartPointer2 = > + params->wm_prog_kernel + prog_data->ksp_offset_2; > + > + wm._8PixelDispatchEnable = prog_data->dispatch_8; > + wm._16PixelDispatchEnable = prog_data->dispatch_16; > + > + wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > + } > + > + if (params->src.addr.buffer) { > + wm.SamplerCount = 1; /* Up to 4 samplers */ > + wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */ > + } > + > + if (params->dst.surf.samples > 1) { > + wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; > + wm.MultisampleDispatchMode = > + (prog_data && prog_data->persample_msaa_dispatch) ? > + MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; > + } else { > + wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; > + wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; > + } > + } > + > +#endif /* GEN_GEN */ > +} > + > + > +static void > +blorp_emit_depth_stencil_config(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > +#if GEN_GEN >= 7 > + const uint32_t mocs = 1; /* GEN7_MOCS_L3 */ > +#else > + const uint32_t mocs = 0; > +#endif > + > + blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) { > + switch (params->depth.surf.dim) { > + case ISL_SURF_DIM_1D: > + db.SurfaceType = SURFTYPE_1D; > + break; > + case ISL_SURF_DIM_2D: > + db.SurfaceType = SURFTYPE_2D; > + break; > + case ISL_SURF_DIM_3D: > + db.SurfaceType = SURFTYPE_3D; > + break; > + } > + > + db.SurfaceFormat = params->depth_format; > + > +#if GEN_GEN >= 7 > + db.DepthWriteEnable = true; > +#endif > + > +#if GEN_GEN <= 6 > + db.TiledSurface = true; > + db.TileWalk = TILEWALK_YMAJOR; > + db.MIPMapLayoutMode = MIPLAYOUT_BELOW; > + db.SeparateStencilBufferEnable = true; > +#endif > + > + db.HierarchicalDepthBufferEnable = true; > + > + db.Width = params->depth.surf.logical_level0_px.width - 1; > + db.Height = params->depth.surf.logical_level0_px.height - 1; > + db.RenderTargetViewExtent = db.Depth = > + MAX2(params->depth.surf.logical_level0_px.depth, > + params->depth.surf.logical_level0_px.array_len) - 1; > + > + db.LOD = params->depth.view.base_level; > + db.MinimumArrayElement = params->depth.view.base_array_layer; > + > + db.SurfacePitch = params->depth.surf.row_pitch - 1; > + db.SurfaceBaseAddress = params->depth.addr; > + db.DepthBufferMOCS = mocs; > + } > + > + blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { > + hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1; > + hiz.SurfaceBaseAddress = params->depth.aux_addr; > + hiz.HierarchicalDepthBufferMOCS = mocs; > + } > + > + blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb); > +} > + > +static uint32_t > +blorp_emit_blend_state(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + struct GENX(BLEND_STATE) blend; > + memset(&blend, 0, sizeof(blend)); > + > + for (unsigned i = 0; i < params->num_draw_buffers; ++i) { > + blend.Entry[i].PreBlendColorClampEnable = true; > + blend.Entry[i].PostBlendColorClampEnable = true; > + blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT; > + > + blend.Entry[i].WriteDisableRed = params->color_write_disable[0]; > + blend.Entry[i].WriteDisableGreen = params->color_write_disable[1]; > + blend.Entry[i].WriteDisableBlue = params->color_write_disable[2]; > + blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3]; > + } > + > + uint32_t offset; > + void *state = blorp_alloc_dynamic_state(batch.blorp, > + AUB_TRACE_BLEND_STATE, > + GENX(BLEND_STATE_length) * 4, > + 64, &offset); > + GENX(BLEND_STATE_pack)(NULL, state, &blend); > + > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { > + sp.BlendStatePointer = offset; > +#if GEN_GEN >= 8 > + sp.BlendStatePointerValid = true; > +#endif > + } > +#endif > + > +#if GEN_GEN >= 8 > + blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) { > + ps_blend.HasWriteableRT = true; > + } > +#endif > + > + return offset; > +} > + > +static uint32_t > +blorp_emit_color_calc_state(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + uint32_t offset; > + void *state = blorp_alloc_dynamic_state(batch.blorp, > + AUB_TRACE_CC_STATE, > + GENX(COLOR_CALC_STATE_length) * 4, > + 64, &offset); > + memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4); > + > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) { > + sp.ColorCalcStatePointer = offset; > +#if GEN_GEN >= 8 > + sp.ColorCalcStatePointerValid = true; > +#endif > + } > +#endif > + > + return offset; > +} > + > +static uint32_t > +blorp_emit_depth_stencil_state(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > +#if GEN_GEN >= 8 > + > + /* On gen8+, DEPTH_STENCIL state is simply an instruction */ > + blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds); > + return 0; > + > +#else /* GEN_GEN <= 7 */ > + > + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: > + * - 7.5.3.1 Depth Buffer Clear > + * - 7.5.3.2 Depth Buffer Resolve > + * - 7.5.3.3 Hierarchical Depth Buffer Resolve > + */ > + struct GENX(DEPTH_STENCIL_STATE) ds = { > + .DepthBufferWriteEnable = true, > + }; > + > + if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { > + ds.DepthTestEnable = true; > + ds.DepthTestFunction = COMPAREFUNCTION_NEVER; > + } > + > + uint32_t offset; > + void *state = blorp_alloc_dynamic_state(batch.blorp, > + AUB_TRACE_DEPTH_STENCIL_STATE, > + GENX(DEPTH_STENCIL_STATE_length) > * 4, > + 64, &offset); > + GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); > + > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) { > + sp.PointertoDEPTH_STENCIL_STATE = offset; > + } > +#endif > + > + return offset; > + > +#endif /* GEN_GEN */ > +} > + > +struct surface_state_info { > + unsigned num_dwords; > + unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes > */ > + unsigned reloc_dw; > + unsigned aux_reloc_dw; > +}; > + > +static const struct surface_state_info surface_state_infos[] = { > + [6] = {6, 32, 1, 0}, > + [7] = {8, 32, 1, 6}, > + [8] = {13, 64, 8, 10}, > + [9] = {16, 64, 8, 10}, > +}; > + > +static void > +blorp_emit_surface_state(struct blorp_context *blorp, > + const struct brw_blorp_surface_info *surface, > + uint32_t *state, uint32_t state_offset, > + bool is_render_target) > +{ > + const struct surface_state_info ss_info = surface_state_infos[GEN_GEN]; > + > + struct isl_surf surf = surface->surf; > + > + if (surf.dim == ISL_SURF_DIM_1D && > + surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) { > + assert(surf.logical_level0_px.height == 1); > + surf.dim = ISL_SURF_DIM_2D; > + } > + > + /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */ > + enum isl_aux_usage aux_usage = surface->aux_usage; > + if (aux_usage == ISL_AUX_USAGE_HIZ) > + aux_usage = ISL_AUX_USAGE_NONE; > + > + const uint32_t mocs = is_render_target ? blorp->mocs.rb : blorp->mocs.tex; > + > + isl_surf_fill_state(blorp->isl_dev, state, > + .surf = &surf, .view = &surface->view, > + .aux_surf = &surface->aux_surf, .aux_usage = > aux_usage, > + .mocs = mocs, .clear_color = surface->clear_color, > + .x_offset_sa = surface->tile_x_sa, > + .y_offset_sa = surface->tile_y_sa); > + > + blorp_surface_reloc(blorp, state_offset + ss_info.reloc_dw * 4, > + surface->addr, 0); > + > + if (aux_usage != ISL_AUX_USAGE_NONE) { > + /* On gen7 and prior, the bottom 12 bits of the MCS base address are > + * used to store other information. This should be ok, however, > because > + * surface buffer addresses are always 4K page alinged. > + */ > + assert((surface->aux_addr.offset & 0xfff) == 0); > + blorp_surface_reloc(blorp, state_offset + ss_info.aux_reloc_dw * 4, > + surface->aux_addr, state[ss_info.aux_reloc_dw]); > + } > +} > + > +static void > +blorp_emit_surface_states(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + uint32_t bind_offset, *bind_map; > + void *surface_maps[2]; > + > + const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4; > + const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ? 64 : 32; > + > + unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL); > + blorp_alloc_binding_table(batch.blorp, num_surfaces, ss_size, ss_align, > + &bind_offset, &bind_map, surface_maps); > + > + blorp_emit_surface_state(batch.blorp, ¶ms->dst, > + surface_maps[BLORP_RENDERBUFFER_BT_INDEX], > + bind_map[BLORP_RENDERBUFFER_BT_INDEX], true); > + if (params->src.addr.buffer) { > + blorp_emit_surface_state(batch.blorp, ¶ms->src, > + surface_maps[BLORP_TEXTURE_BT_INDEX], > + bind_map[BLORP_TEXTURE_BT_INDEX], false); > + } > + > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) { > + bt.PointertoPSBindingTable = bind_offset; > + } > +#else > + blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { > + bt.PSBindingTableChange = true; > + bt.PointertoPSBindingTable = bind_offset; > + } > +#endif > +} > + > +static void > +blorp_emit_sampler_state(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + struct GENX(SAMPLER_STATE) sampler = { > + .MipModeFilter = MIPFILTER_NONE, > + .MagModeFilter = MAPFILTER_LINEAR, > + .MinModeFilter = MAPFILTER_LINEAR, > + .MinLOD = 0, > + .MaxLOD = 0, > + .TCXAddressControlMode = TCM_CLAMP, > + .TCYAddressControlMode = TCM_CLAMP, > + .TCZAddressControlMode = TCM_CLAMP, > + .MaximumAnisotropy = RATIO21, > + .RAddressMinFilterRoundingEnable = true, > + .RAddressMagFilterRoundingEnable = true, > + .VAddressMinFilterRoundingEnable = true, > + .VAddressMagFilterRoundingEnable = true, > + .UAddressMinFilterRoundingEnable = true, > + .UAddressMagFilterRoundingEnable = true, > + .NonnormalizedCoordinateEnable = true, > + }; > + > + uint32_t offset; > + void *state = blorp_alloc_dynamic_state(batch.blorp, > + AUB_TRACE_SAMPLER_STATE, > + GENX(SAMPLER_STATE_length) * 4, > + 32, &offset); > + GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); > + > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { > + ssp.PointertoPSSamplerState = offset; > + } > +#else > + blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { > + ssp.VSSamplerStateChange = true; > + ssp.GSSamplerStateChange = true; > + ssp.PSSamplerStateChange = true; > + ssp.PointertoPSSamplerState = offset; > + } > +#endif > +} > + > +/* 3DSTATE_VIEWPORT_STATE_POINTERS */ > +static void > +blorp_emit_viewport_state(struct blorp_batch batch, > + const struct brw_blorp_params *params) > +{ > + uint32_t cc_vp_offset; > + > + void *state = blorp_alloc_dynamic_state(batch.blorp, > + AUB_TRACE_CC_VP_STATE, > + GENX(CC_VIEWPORT_length) * 4, 32, > + &cc_vp_offset); > + > + GENX(CC_VIEWPORT_pack)(&batch, state, > + &(struct GENX(CC_VIEWPORT)) { > + .MinimumDepth = 0.0, > + .MaximumDepth = 1.0, > + }); > + > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { > + vsp.CCViewportPointer = cc_vp_offset; > + } > +#else > + blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { > + vsp.CCViewportStateChange = true; > + vsp.PointertoCC_VIEWPORT = cc_vp_offset; > + } > +#endif > +} > + > + > +/** > + * \brief Execute a blit or render pass operation. > + * > + * To execute the operation, this function manually constructs and emits a > + * batch to draw a rectangle primitive. The batchbuffer is flushed before > + * constructing and after emitting the batch. > + * > + * This function alters no GL state. > + */ > +static void > +blorp_exec(struct blorp_context *blorp, void *batch_data, > + const struct brw_blorp_params *params) > +{ > + struct blorp_batch batch = { > + .blorp = blorp, > + .batch = batch_data, > + }; > + > + uint32_t blend_state_offset = 0; > + uint32_t color_calc_state_offset = 0; > + uint32_t depth_stencil_state_offset; > + > + blorp_emit_vertex_buffers(batch, params); > + blorp_emit_vertex_elements(batch, params); > + > + emit_urb_config(batch, params); > + > + if (params->wm_prog_data) { > + blend_state_offset = blorp_emit_blend_state(batch, params); > + color_calc_state_offset = blorp_emit_color_calc_state(batch, params); > + } > + depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, > params); > + > +#if GEN_GEN <= 6 > + /* The dynamic state emit helpers emit their own STATE_POINTERS packets on > + * gen7+. However, on gen6 and earlier, they're all lumpped together in > + * one CC_STATE_POINTERS packet so we have to emit that here. > + */ > + blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) { > + cc.BLEND_STATEChange = true; > + cc.COLOR_CALC_STATEChange = true; > + cc.DEPTH_STENCIL_STATEChange = true; > + cc.PointertoBLEND_STATE = blend_state_offset; > + cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; > + cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; > + } > +#else > + (void)blend_state_offset; > + (void)color_calc_state_offset; > + (void)depth_stencil_state_offset; > +#endif > + > + blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs); > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs); > + blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS); > +#endif > + blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs); > + blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps); > + > + if (params->wm_prog_data) > + blorp_emit_surface_states(batch, params); > + > + if (params->src.addr.buffer) > + blorp_emit_sampler_state(batch, params); > + > + blorp_emit_3dstate_multisample(batch.blorp, batch.batch, > + params->dst.surf.samples); > + > + blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) { > + mask.SampleMask = (1 << params->dst.surf.samples) - 1; > + } > + > + blorp_emit(batch, GENX(3DSTATE_VS), vs); > +#if GEN_GEN >= 7 > + blorp_emit(batch, GENX(3DSTATE_HS), hs); > + blorp_emit(batch, GENX(3DSTATE_TE), te); > + blorp_emit(batch, GENX(3DSTATE_DS), DS); > + blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so); > +#endif > + blorp_emit(batch, GENX(3DSTATE_GS), gs); > + > + blorp_emit(batch, GENX(3DSTATE_CLIP), clip) { > + clip.PerspectiveDivideDisable = true; > + } > + > + blorp_emit_sf_config(batch, params); > + blorp_emit_ps_config(batch, params); > + > + blorp_emit_viewport_state(batch, params); > + > + if (params->depth.addr.buffer) { > + blorp_emit_depth_stencil_config(batch, params); > + } else { > + blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) { > + db.SurfaceType = SURFTYPE_NULL; > + db.SurfaceFormat = D32_FLOAT; > + } > + blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz); > + blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb); > + } > + > + blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) { > + clear.DepthClearValueValid = true; > + clear.DepthClearValue = params->depth.clear_color.u32[0]; > + } > + > + blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { > + rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1; > + rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1; > + } > + > + blorp_emit(batch, GENX(3DPRIMITIVE), prim) { > + prim.VertexAccessType = SEQUENTIAL; > + prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; > + prim.VertexCountPerInstance = 3; > + prim.InstanceCount = params->num_layers; > + } > +} > -- > 2.5.0.400.gff86faf > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev