Hi Dave, The change in si_shader_io_get_unique_index can be dropped. The function is only used for shaders before GS.
This looks good, but I've had a different plan for this feature: I'd like the states to be converted into 2 atoms: 1 r600_atom for all 16 viewports 1 r600_atom for all 16 scissors Each atom should have a bitmask saying which "slots" are dirty. (the same idea as resource slots) The "emit" functions should only emit dirty viewports/scissors. Also, the "emit" functions shouldn't emit non-zero viewports/scissors if the viewport index isn't written by the hardware VS stage (si_get_vs_info(sctx)->...). This should keep the same level of effectiveness as before. When a shader that writes the viewport index is bound *and* there are any dirty viewports or scissors, that's the right time to mark the atoms as dirty again, so that non-zero dirty viewports/scissors are finally emitted. Marek On Thu, Jun 25, 2015 at 6:38 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > This isn't pretty and I'd suggest it the pm4 interface builder > could be tweaked to do this more efficently, but I'd need > guidance on how that would look. > > This seems to pass the few piglit tests I threw at it. > > v2: handle passing layer/viewport index to fragment shader. > fix crash in blit changes, > add support to io_get_unique_index for layer/viewport index > update docs. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > docs/GL3.txt | 4 +- > docs/relnotes/10.7.0.html | 3 ++ > src/gallium/drivers/radeonsi/si_blit.c | 8 +-- > src/gallium/drivers/radeonsi/si_pipe.c | 2 +- > src/gallium/drivers/radeonsi/si_shader.c | 26 +++++++--- > src/gallium/drivers/radeonsi/si_state.c | 66 > +++++++++++++++---------- > src/gallium/drivers/radeonsi/si_state.h | 4 +- > src/gallium/drivers/radeonsi/si_state_shaders.c | 2 - > 8 files changed, 73 insertions(+), 42 deletions(-) > > diff --git a/docs/GL3.txt b/docs/GL3.txt > index 220bcc8..df913bd 100644 > --- a/docs/GL3.txt > +++ b/docs/GL3.txt > @@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10: > GL_ARB_separate_shader_objects DONE (all drivers) > GL_ARB_shader_precision started (Micah) > GL_ARB_vertex_attrib_64bit DONE (nvc0, softpipe) > - GL_ARB_viewport_array DONE (i965, nv50, > nvc0, r600, llvmpipe) > + GL_ARB_viewport_array DONE (i965, nv50, > nvc0, r600, radeonsi, llvmpipe) > > > GL 4.2, GLSL 4.20: > @@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30: > GL_ARB_copy_image DONE (i965) (gallium > - in progress, VMware) > GL_KHR_debug DONE (all drivers) > GL_ARB_explicit_uniform_location DONE (all drivers > that support GLSL) > - GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, > r600, llvmpipe) > + GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, > r600, radeonsi, llvmpipe) > GL_ARB_framebuffer_no_attachments DONE (i965) > GL_ARB_internalformat_query2 not started > GL_ARB_invalidate_subdata DONE (all drivers) > diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html > index e089889..fcc5081 100644 > --- a/docs/relnotes/10.7.0.html > +++ b/docs/relnotes/10.7.0.html > @@ -44,8 +44,11 @@ Note: some of the new features are only available with > certain drivers. > </p> > > <ul> > +<li>GL_AMD_vertex_shader_viewport_index on radeonsi</li> > <li>GL_ARB_framebuffer_no_attachments on i965</li> > <li>GL_ARB_shader_stencil_export on llvmpipe</li> > +<li>GL_ARB_viewport_array on radeonsi</li> > +<li>GL_ARB_fragment_layer_viewport on radeonsi</li> > </ul> > > <h2>Bug fixes</h2> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c > b/src/gallium/drivers/radeonsi/si_blit.c > index 1f2c408..6c7b383 100644 > --- a/src/gallium/drivers/radeonsi/si_blit.c > +++ b/src/gallium/drivers/radeonsi/si_blit.c > @@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, > enum si_blitter_op op) > util_blitter_save_sample_mask(sctx->blitter, > > sctx->queued.named.sample_mask->sample_mask); > } > - if (sctx->queued.named.viewport) { > - util_blitter_save_viewport(sctx->blitter, > &sctx->queued.named.viewport->viewport); > + if (sctx->queued.named.viewport[0]) { > + util_blitter_save_viewport(sctx->blitter, > &sctx->queued.named.viewport[0]->viewport); > } > - if (sctx->queued.named.scissor) { > - util_blitter_save_scissor(sctx->blitter, > &sctx->queued.named.scissor->scissor); > + if (sctx->queued.named.scissor[0]) { > + util_blitter_save_scissor(sctx->blitter, > &sctx->queued.named.scissor[0]->scissor); > } > util_blitter_save_vertex_buffer_slot(sctx->blitter, > sctx->vertex_buffer); > util_blitter_save_so_targets(sctx->blitter, > sctx->b.streamout.num_targets, > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index 53ae71a..480a301 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum > pipe_cap param) > return 8; > > case PIPE_CAP_MAX_VIEWPORTS: > - return 1; > + return 16; > > /* Timer queries, present when the clock frequency is non zero. */ > case PIPE_CAP_QUERY_TIMESTAMP: > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 47e5f96..87608a1 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -125,12 +125,16 @@ unsigned si_shader_io_get_unique_index(unsigned > semantic_name, unsigned index) > return 0; > case TGSI_SEMANTIC_PSIZE: > return 1; > + case TGSI_SEMANTIC_LAYER: > + return 2; > + case TGSI_SEMANTIC_VIEWPORT_INDEX: > + return 3; > case TGSI_SEMANTIC_CLIPDIST: > assert(index <= 1); > - return 2 + index; > - case TGSI_SEMANTIC_GENERIC: > - assert(index <= 63-4); > return 4 + index; > + case TGSI_SEMANTIC_GENERIC: > + assert(index <= 63-6); > + return 6 + index; > > default: > assert(0); > @@ -1128,7 +1132,7 @@ static void si_llvm_export_vs(struct > lp_build_tgsi_context *bld_base, > > &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; > LLVMValueRef args[9]; > LLVMValueRef pos_args[4][9] = { { 0 } }; > - LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = > NULL; > + LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = > NULL, viewport_index_value = NULL; > unsigned semantic_name, semantic_index; > unsigned target; > unsigned param_count = 0; > @@ -1154,7 +1158,12 @@ handle_semantic: > continue; > case TGSI_SEMANTIC_LAYER: > layer_value = outputs[i].values[0]; > - continue; > + semantic_name = TGSI_SEMANTIC_GENERIC; > + goto handle_semantic; > + case TGSI_SEMANTIC_VIEWPORT_INDEX: > + viewport_index_value = outputs[i].values[0]; > + semantic_name = TGSI_SEMANTIC_GENERIC; > + goto handle_semantic; > case TGSI_SEMANTIC_POSITION: > target = V_008DFC_SQ_EXP_POS; > break; > @@ -1220,11 +1229,13 @@ handle_semantic: > /* Write the misc vector (point size, edgeflag, layer, viewport). */ > if (shader->selector->info.writes_psize || > shader->selector->info.writes_edgeflag || > + shader->selector->info.writes_viewport_index || > shader->selector->info.writes_layer) { > pos_args[1][0] = lp_build_const_int32(base->gallivm, /* > writemask */ > > shader->selector->info.writes_psize | > > (shader->selector->info.writes_edgeflag << 1) | > - > (shader->selector->info.writes_layer << 2)); > + > (shader->selector->info.writes_layer << 2) | > + > (shader->selector->info.writes_viewport_index << 3)); > pos_args[1][1] = uint->zero; /* EXEC mask */ > pos_args[1][2] = uint->zero; /* last export? */ > pos_args[1][3] = lp_build_const_int32(base->gallivm, > V_008DFC_SQ_EXP_POS + 1); > @@ -1255,6 +1266,9 @@ handle_semantic: > > if (shader->selector->info.writes_layer) > pos_args[1][7] = layer_value; > + > + if (shader->selector->info.writes_viewport_index) > + pos_args[1][8] = viewport_index_value; > } > > for (i = 0; i < 4; i++) > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index 6c18836..752467b 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -489,11 +489,13 @@ static void si_emit_clip_regs(struct si_context *sctx, > struct r600_atom *atom) > S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | > S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | > S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | > + S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | > S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | > S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | > S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || > info->writes_edgeflag || > - info->writes_layer) | > + info->writes_layer || > + info->writes_viewport_index) | > (sctx->queued.named.rasterizer->clip_plane_enable & > clipdist_mask)); > r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, > @@ -509,20 +511,26 @@ static void si_set_scissor_states(struct pipe_context > *ctx, > const struct pipe_scissor_state *state) > { > struct si_context *sctx = (struct si_context *)ctx; > - struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor); > - struct si_pm4_state *pm4 = &scissor->pm4; > - > - if (scissor == NULL) > - return; > + struct si_state_scissor *scissor; > + struct si_pm4_state *pm4; > + int i; > > - scissor->scissor = *state; > - si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, > - S_028250_TL_X(state->minx) | > S_028250_TL_Y(state->miny) | > - S_028250_WINDOW_OFFSET_DISABLE(1)); > - si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, > - S_028254_BR_X(state->maxx) | > S_028254_BR_Y(state->maxy)); > + for (i = start_slot; i < start_slot + num_scissors; i++) { > + int idx = i - start_slot; > + int offset = i * 4 * 2; > > - si_pm4_set_state(sctx, scissor, scissor); > + scissor = CALLOC_STRUCT(si_state_scissor); > + if (scissor == NULL) > + return; > + pm4 = &scissor->pm4; > + scissor->scissor = state[idx]; > + si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + > offset, > + S_028250_TL_X(state[idx].minx) | > S_028250_TL_Y(state[idx].miny) | > + S_028250_WINDOW_OFFSET_DISABLE(1)); > + si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + > offset, > + S_028254_BR_X(state[idx].maxx) | > S_028254_BR_Y(state[idx].maxy)); > + si_pm4_set_state(sctx, scissor[i], scissor); > + } > } > > static void si_set_viewport_states(struct pipe_context *ctx, > @@ -531,21 +539,29 @@ static void si_set_viewport_states(struct pipe_context > *ctx, > const struct pipe_viewport_state *state) > { > struct si_context *sctx = (struct si_context *)ctx; > - struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport); > - struct si_pm4_state *pm4 = &viewport->pm4; > + struct si_state_viewport *viewport; > + struct si_pm4_state *pm4; > + int i; > > - if (viewport == NULL) > - return; > + for (i = start_slot; i < start_slot + num_viewports; i++) { > + int idx = i - start_slot; > + int offset = i * 4 * 6; > > - viewport->viewport = *state; > - si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, > fui(state->scale[0])); > - si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, > fui(state->translate[0])); > - si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, > fui(state->scale[1])); > - si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, > fui(state->translate[1])); > - si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, > fui(state->scale[2])); > - si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, > fui(state->translate[2])); > + viewport = CALLOC_STRUCT(si_state_viewport); > + if (!viewport) > + return; > + pm4 = &viewport->pm4; > + > + viewport->viewport = state[idx]; > + si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, > fui(state[idx].scale[0])); > + si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0 + offset, > fui(state[idx].translate[0])); > + si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0 + offset, > fui(state[idx].scale[1])); > + si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0 + offset, > fui(state[idx].translate[1])); > + si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0 + offset, > fui(state[idx].scale[2])); > + si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0 + offset, > fui(state[idx].translate[2])); > > - si_pm4_set_state(sctx, viewport, viewport); > + si_pm4_set_state(sctx, viewport[i], viewport); > + } > } > > /* > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index 5e68b16..d1f2dff 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -92,8 +92,8 @@ union si_state { > struct si_pm4_state *blend_color; > struct si_pm4_state *clip; > struct si_state_sample_mask *sample_mask; > - struct si_state_scissor *scissor; > - struct si_state_viewport *viewport; > + struct si_state_scissor *scissor[16]; > + struct si_state_viewport *viewport[16]; > struct si_state_rasterizer *rasterizer; > struct si_state_dsa *dsa; > struct si_pm4_state *fb_rs; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 208c852..48128fa 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -187,8 +187,6 @@ static void si_shader_vs(struct si_shader *shader) > case TGSI_SEMANTIC_POSITION: > case TGSI_SEMANTIC_PSIZE: > case TGSI_SEMANTIC_EDGEFLAG: > - case TGSI_SEMANTIC_VIEWPORT_INDEX: > - case TGSI_SEMANTIC_LAYER: > break; > default: > nparams++; > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev