On Mon, Oct 31, 2016 at 11:11 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > Fixes GL45-CTS.geometry_shader.adjacency.adjacency_indiced_triangle_strip and > others. > > This leaves the case of triangle strips with adjacency and primitive restarts > open. It seems that the only thing that cares about that is a piglit test. > Fixing this efficiently would be really involved, and I don't want to use the > hammer of degrading to software handling of indices because there may well > be software that uses this draw mode (without caring about the precise > rotation of triangles). > --- > src/gallium/drivers/radeonsi/si_pipe.c | 1 + > src/gallium/drivers/radeonsi/si_pipe.h | 2 + > src/gallium/drivers/radeonsi/si_shader.c | 112 > ++++++++++++++++++++++++ > src/gallium/drivers/radeonsi/si_shader.h | 10 +++ > src/gallium/drivers/radeonsi/si_state_draw.c | 18 ++++ > src/gallium/drivers/radeonsi/si_state_shaders.c | 7 +- > 6 files changed, 146 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index bf3b442..bc633bb 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -665,20 +665,21 @@ static int si_get_shader_param(struct pipe_screen* > pscreen, unsigned shader, enu > return 0; > } > > static void si_destroy_screen(struct pipe_screen* pscreen) > { > struct si_screen *sscreen = (struct si_screen *)pscreen; > struct si_shader_part *parts[] = { > sscreen->vs_prologs, > sscreen->vs_epilogs, > sscreen->tcs_epilogs, > + sscreen->gs_prologs, > sscreen->ps_prologs, > sscreen->ps_epilogs > }; > unsigned i; > > if (!sscreen) > return; > > if (!sscreen->b.ws->unref(sscreen->b.ws)) > return; > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index e7617bc..8e6a94d 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -89,20 +89,21 @@ struct si_screen { > bool has_ds_bpermute; > > /* Whether shaders are monolithic (1-part) or separate (3-part). */ > bool use_monolithic_shaders; > bool record_llvm_ir; > > pipe_mutex shader_parts_mutex; > struct si_shader_part *vs_prologs; > struct si_shader_part *vs_epilogs; > struct si_shader_part *tcs_epilogs; > + struct si_shader_part *gs_prologs; > struct si_shader_part *ps_prologs; > struct si_shader_part *ps_epilogs; > > /* Shader cache in memory. > * > * Design & limitations: > * - The shader cache is per screen (= per process), never saved to > * disk, and skips redundant shader compilations from TGSI to > bytecode. > * - It can only be used with one-variant-per-shader support, in which > * case only the main (typically middle) part of shaders is cached. > @@ -312,20 +313,21 @@ struct si_context { > int last_sh_base_reg; > int last_primitive_restart_en; > int last_restart_index; > int last_gs_out_prim; > int last_prim; > int last_multi_vgt_param; > int last_rast_prim; > unsigned last_sc_line_stipple; > int last_vtx_reuse_depth; > int current_rast_prim; /* primitive type after > TES, GS */ > + bool gs_tri_strip_adj_fix; > unsigned last_gsvs_itemsize; > > /* Scratch buffer */ > struct r600_resource *scratch_buffer; > bool emit_scratch_reloc; > unsigned scratch_waves; > unsigned spi_tmpring_size; > > struct r600_resource *compute_scratch_buffer; > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index fe15420..9141d62 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -6740,20 +6740,92 @@ static void si_get_ps_epilog_key(struct si_shader > *shader, > struct tgsi_shader_info *info = &shader->selector->info; > memset(key, 0, sizeof(*key)); > key->ps_epilog.colors_written = info->colors_written; > key->ps_epilog.writes_z = info->writes_z; > key->ps_epilog.writes_stencil = info->writes_stencil; > key->ps_epilog.writes_samplemask = info->writes_samplemask; > key->ps_epilog.states = shader->key.ps.epilog; > } > > /** > + * Build the GS prolog function. Rotate the input vertices for triangle > strips > + * with adjacency. > + */ > +static void si_build_gs_prolog_function(struct si_shader_context *ctx, > + union si_shader_part_key *key) > +{ > + const unsigned num_sgprs = SI_GS_NUM_USER_SGPR + 2; > + const unsigned num_vgprs = 8; > + struct gallivm_state *gallivm = &ctx->gallivm; > + LLVMBuilderRef builder = gallivm->builder; > + LLVMTypeRef params[32]; > + LLVMTypeRef returns[32]; > + LLVMValueRef func, ret; > + > + for (unsigned i = 0; i < num_sgprs; ++i) { > + params[i] = ctx->i32; > + returns[i] = ctx->i32; > + } > + > + for (unsigned i = 0; i < num_vgprs; ++i) { > + params[num_sgprs + i] = ctx->i32; > + returns[num_sgprs + i] = ctx->f32; > + } > + > + /* Create the function. */ > + si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, > + params, num_sgprs + num_vgprs, num_sgprs - 1); > + func = ctx->main_fn; > + > + /* Copy inputs to outputs. This should be no-op, as the registers > match, > + * but it will prevent the compiler from overwriting them > unintentionally. > + */ > + ret = ctx->return_value; > + for (unsigned i = 0; i < num_sgprs; i++) { > + LLVMValueRef p = LLVMGetParam(func, i); > + ret = LLVMBuildInsertValue(builder, ret, p, i, ""); > + } > + for (unsigned i = 0; i < num_vgprs; i++) { > + LLVMValueRef p = LLVMGetParam(func, num_sgprs + i); > + p = LLVMBuildBitCast(builder, p, ctx->f32, ""); > + ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, > ""); > + } > + > + if (key->gs_prolog.states.tri_strip_adj_fix) { > + /* Remap the input vertices for every other primitive. */ > + const unsigned vtx_params[6] = { > + num_sgprs, > + num_sgprs + 1, > + num_sgprs + 3, > + num_sgprs + 4, > + num_sgprs + 5, > + num_sgprs + 6 > + }; > + LLVMValueRef prim_id, rotate; > + > + prim_id = LLVMGetParam(func, num_sgprs + 2); > + rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, ""); > + > + for (unsigned i = 0; i < 6; ++i) { > + LLVMValueRef base, rotated, actual; > + base = LLVMGetParam(func, vtx_params[i]); > + rotated = LLVMGetParam(func, vtx_params[(i + 4) % 6]); > + actual = LLVMBuildSelect(builder, rotate, rotated, > base, ""); > + actual = LLVMBuildBitCast(builder, actual, ctx->f32, > ""); > + ret = LLVMBuildInsertValue(builder, ret, actual, > vtx_params[i], ""); > + } > + } > + > + LLVMBuildRet(builder, ret); > +} > + > +/** > * Given a list of shader part functions, build a wrapper function that > * runs them in sequence to form a monolithic shader. > */ > static void si_build_wrapper_function(struct si_shader_context *ctx, > LLVMValueRef *parts, > unsigned num_parts, > unsigned main_part) > { > struct gallivm_state *gallivm = &ctx->gallivm; > LLVMBuilderRef builder = ctx->gallivm.builder; > @@ -7012,20 +7084,32 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, > LLVMValueRef parts[2]; > union si_shader_part_key epilog_key; > > parts[0] = ctx.main_fn; > > si_get_vs_epilog_key(shader, &shader->key.tes.epilog, > &epilog_key); > si_build_vs_epilog_function(&ctx, &epilog_key); > parts[1] = ctx.main_fn; > > si_build_wrapper_function(&ctx, parts, 2, 0); > + } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) { > + LLVMValueRef parts[2]; > + union si_shader_part_key prolog_key; > + > + parts[1] = ctx.main_fn; > + > + memset(&prolog_key, 0, sizeof(prolog_key)); > + prolog_key.gs_prolog.states = shader->key.gs.prolog; > + si_build_gs_prolog_function(&ctx, &prolog_key); > + parts[0] = ctx.main_fn; > + > + si_build_wrapper_function(&ctx, parts, 2, 1); > } else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) { > LLVMValueRef parts[3]; > union si_shader_part_key prolog_key; > union si_shader_part_key epilog_key; > bool need_prolog; > > si_get_ps_prolog_key(shader, &prolog_key, false); > need_prolog = si_need_ps_prolog(&prolog_key); > > parts[need_prolog ? 1 : 0] = ctx.main_fn; > @@ -7200,20 +7284,23 @@ si_get_shader_part(struct si_screen *sscreen, > si_init_shader_ctx(&ctx, sscreen, &shader, tm); > ctx.type = type; > > switch (type) { > case PIPE_SHADER_VERTEX: > break; > case PIPE_SHADER_TESS_CTRL: > assert(!prolog); > shader.key.tcs.epilog = key->tcs_epilog.states; > break; > + case PIPE_SHADER_GEOMETRY: > + assert(prolog); > + break; > case PIPE_SHADER_FRAGMENT: > if (prolog) > shader.key.ps.prolog = key->ps_prolog.states; > else > shader.key.ps.epilog = key->ps_epilog.states; > break; > default: > unreachable("bad shader part"); > } > > @@ -7524,20 +7611,41 @@ static bool si_shader_select_tcs_parts(struct > si_screen *sscreen, > > shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, > PIPE_SHADER_TESS_CTRL, false, > &epilog_key, tm, debug, > si_build_tcs_epilog_function, > "Tessellation Control Shader > Epilog"); > return shader->epilog != NULL; > } > > /** > + * Select and compile (or reuse) GS parts (prolog). > + */ > +static bool si_shader_select_gs_parts(struct si_screen *sscreen, > + LLVMTargetMachineRef tm, > + struct si_shader *shader, > + struct pipe_debug_callback *debug) > +{ > + union si_shader_part_key prolog_key; > + > + memset(&prolog_key, 0, sizeof(prolog_key)); > + prolog_key.gs_prolog.states = shader->key.gs.prolog; > +
If the workaround is disabled, you can do return here and skip building an empty prolog. > + shader->prolog = si_get_shader_part(sscreen, &sscreen->gs_prologs, > + PIPE_SHADER_GEOMETRY, true, > + &prolog_key, tm, debug, > + si_build_gs_prolog_function, > + "Geometry Shader Prolog"); > + return shader->prolog != NULL; > +} > + > +/** > * Build the pixel shader prolog function. This handles: > * - two-side color selection and interpolation > * - overriding interpolation parameters for the API PS > * - polygon stippling > * > * All preloaded SGPRs and VGPRs are passed through unmodified unless they > are > * overriden by other states. (e.g. per-sample interpolation) > * Interpolated colors are stored after the preloaded VGPRs. > */ > static void si_build_ps_prolog_function(struct si_shader_context *ctx, > @@ -8040,20 +8148,24 @@ int si_shader_create(struct si_screen *sscreen, > LLVMTargetMachineRef tm, > return -1; > break; > case PIPE_SHADER_TESS_CTRL: > if (!si_shader_select_tcs_parts(sscreen, tm, shader, > debug)) > return -1; > break; > case PIPE_SHADER_TESS_EVAL: > if (!si_shader_select_tes_parts(sscreen, tm, shader, > debug)) > return -1; > break; > + case PIPE_SHADER_GEOMETRY: > + if (!si_shader_select_gs_parts(sscreen, tm, shader, > debug)) > + return - 1; Unnecessary space between - and 1. > + break; > case PIPE_SHADER_FRAGMENT: > if (!si_shader_select_ps_parts(sscreen, tm, shader, > debug)) > return -1; > > /* Make sure we have at least as many VGPRs as there > * are allocated inputs. > */ > shader->config.num_vgprs = > MAX2(shader->config.num_vgprs, > > shader->info.num_input_vgprs); > break; > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > b/src/gallium/drivers/radeonsi/si_shader.h > index 91f9cbf..d8ab2a4 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -318,20 +318,24 @@ struct si_vs_epilog_bits { > * if PS doesn't read them > */ > }; > > /* Common TCS bits between the shader key and the epilog key. */ > struct si_tcs_epilog_bits { > unsigned prim_mode:3; > uint64_t inputs_to_copy; > }; > > +struct si_gs_prolog_bits { > + unsigned tri_strip_adj_fix:1; > +}; > + > /* Common PS bits between the shader key and the prolog key. */ > struct si_ps_prolog_bits { > unsigned color_two_side:1; > unsigned flatshade_colors:1; > unsigned poly_stipple:1; > unsigned force_persp_sample_interp:1; > unsigned force_linear_sample_interp:1; > unsigned force_persp_center_interp:1; > unsigned force_linear_center_interp:1; > unsigned bc_optimize_for_persp:1; > @@ -356,20 +360,23 @@ union si_shader_part_key { > unsigned last_input:4; > } vs_prolog; > struct { > struct si_vs_epilog_bits states; > unsigned prim_id_param_offset:5; > } vs_epilog; > struct { > struct si_tcs_epilog_bits states; > } tcs_epilog; > struct { > + struct si_gs_prolog_bits states; > + } gs_prolog; > + struct { > struct si_ps_prolog_bits states; > unsigned num_input_sgprs:5; > unsigned num_input_vgprs:5; > /* Color interpolation and two-side color selection. */ > unsigned colors_read:8; /* color input components read > */ > unsigned num_interp_inputs:5; /* BCOLOR is at this > location */ > unsigned face_vgpr_index:5; > unsigned wqm:1; > char color_attr_index[2]; > char color_interp_vgpr_index[2]; /* -1 == constant > */ > @@ -394,20 +401,23 @@ union si_shader_key { > unsigned as_es:1; /* export shader */ > unsigned as_ls:1; /* local shader */ > } vs; > struct { > struct si_tcs_epilog_bits epilog; > } tcs; /* tessellation control shader */ > struct { > struct si_vs_epilog_bits epilog; /* same as VS */ > unsigned as_es:1; /* export shader */ > } tes; /* tessellation evaluation shader */ > + struct { > + struct si_gs_prolog_bits prolog; > + } gs; > }; > > struct si_shader_config { > unsigned num_sgprs; > unsigned num_vgprs; > unsigned spilled_sgprs; > unsigned spilled_vgprs; > unsigned lds_size; > unsigned spi_ps_input_ena; > unsigned spi_ps_input_addr; > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c > b/src/gallium/drivers/radeonsi/si_state_draw.c > index c0e2642..b934100 100644 > --- a/src/gallium/drivers/radeonsi/si_state_draw.c > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c > @@ -975,20 +975,38 @@ void si_draw_vbo(struct pipe_context *ctx, const struct > pipe_draw_info *info) > else if (sctx->tes_shader.cso) > rast_prim = > sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; > else > rast_prim = info->mode; > > if (rast_prim != sctx->current_rast_prim) { > sctx->current_rast_prim = rast_prim; > sctx->do_update_shaders = true; > } > > + if (sctx->gs_shader.cso) { > + /* Determine whether the GS triangle strip adjacency fix > should > + * be applied. Rotate every other triangle if > + * - triangle strips with adjacency are fed to the GS and > + * - primitive restart is disabled (the rotation doesn't help > + * when the restart occurs after an odd number of > triangles). > + */ > + bool gs_tri_strip_adj_fix = > + !sctx->tcs_shader.cso && !sctx->tes_shader.cso && No need to check tcs_shader. Tess is enabled if tes_shader is set and disabled otherwise. With all the above fixed, the series is: Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev