On Mon, Jan 16, 2017 at 3:00 PM, Marek Olšák <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > v2: add u_bit_consecutive64 > --- > src/gallium/drivers/radeonsi/si_shader.c | 4 ++-- > src/gallium/drivers/radeonsi/si_shader.h | 4 ++-- > src/gallium/drivers/radeonsi/si_state.c | 6 +++--- > src/gallium/drivers/radeonsi/si_state.h | 2 +- > src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- > src/util/bitscan.h | 9 +++++++++ > 6 files changed, 18 insertions(+), 9 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 6f0f414..dfba9d4 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -426,21 +426,21 @@ static void declare_input_vs( > "llvm.SI.vs.load.input", ctx->v4f32, args, 3, > LP_FUNC_ATTR_READNONE); > > /* Break up the vec4 into individual components */ > for (chan = 0; chan < 4; chan++) { > LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); > out[chan] = LLVMBuildExtractElement(gallivm->builder, > input, llvm_chan, ""); > } > > - fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (2 * input_index)) > & 3; > + fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) > & 0xf; > if (fix_fetch) { > /* The hardware returns an unsigned value; convert it to a > * signed one. > */ > LLVMValueRef tmp = out[3]; > LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0); > > /* First, recover the sign-extended signed integer value. */ > if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) > tmp = LLVMBuildFPToUI(gallivm->builder, tmp, > ctx->i32, ""); > @@ -6578,21 +6578,21 @@ static void si_dump_shader_key(unsigned shader, > struct si_shader_key *key, > switch (shader) { > case PIPE_SHADER_VERTEX: > fprintf(f, " part.vs.prolog.instance_divisors = {"); > for (i = 0; i < > ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++) > fprintf(f, !i ? "%u" : ", %u", > key->part.vs.prolog.instance_divisors[i]); > fprintf(f, "}\n"); > fprintf(f, " part.vs.epilog.export_prim_id = %u\n", > key->part.vs.epilog.export_prim_id); > fprintf(f, " as_es = %u\n", key->as_es); > fprintf(f, " as_ls = %u\n", key->as_ls); > - fprintf(f, " mono.vs.fix_fetch = 0x%x\n", > key->mono.vs.fix_fetch); > + fprintf(f, " mono.vs.fix_fetch = 0x%"PRIx64"\n", > key->mono.vs.fix_fetch); > break; > > case PIPE_SHADER_TESS_CTRL: > fprintf(f, " part.tcs.epilog.prim_mode = %u\n", > key->part.tcs.epilog.prim_mode); > fprintf(f, " mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", > key->mono.tcs.inputs_to_copy); > break; > > case PIPE_SHADER_TESS_EVAL: > fprintf(f, " part.tes.epilog.export_prim_id = %u\n", > key->part.tes.epilog.export_prim_id); > fprintf(f, " as_es = %u\n", key->as_es); > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > b/src/gallium/drivers/radeonsi/si_shader.h > index 1b5dec2..89f9628 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -418,22 +418,22 @@ struct si_shader_key { > > /* These two are initially set according to the NEXT_SHADER property, > * or guessed if the property doesn't seem correct. > */ > unsigned as_es:1; /* export shader */ > unsigned as_ls:1; /* local shader */ > > /* Flags for monolithic compilation only. */ > union { > struct { > - /* One pair of bits for every input: SI_FIX_FETCH_* > enums. */ > - uint32_t fix_fetch; > + /* One nibble for every input: SI_FIX_FETCH_* enums. > */ > + uint64_t fix_fetch; > } vs; > struct { > uint64_t inputs_to_copy; /* for fixed-func TCS > */ > } tcs; > } mono; > > /* Optimization flags for asynchronous compilation only. */ > union { > struct { > uint64_t kill_outputs; /* "get_unique_index" > bits */ > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index 6e7d8da..fa78a56 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -3356,26 +3356,26 @@ static void *si_create_vertex_elements(struct > pipe_context *ctx, > > S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | > S_008F0C_NUM_FORMAT(num_format) | > S_008F0C_DATA_FORMAT(data_format); > v->format_size[i] = desc->block.bits / 8; > > /* The hardware always treats the 2-bit alpha channel as > * unsigned, so a shader workaround is needed. > */ > if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { > if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { > - v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * > i); > + v->fix_fetch |= > (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i); > } else if (num_format == > V_008F0C_BUF_NUM_FORMAT_SSCALED) { > - v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 > * i); > + v->fix_fetch |= > (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i); > } else if (num_format == > V_008F0C_BUF_NUM_FORMAT_SINT) { > /* This isn't actually used in OpenGL. */ > - v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * > i); > + v->fix_fetch |= > (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i); > } > } > > /* We work around the fact that 8_8_8 and 16_16_16 data > formats > * do not exist by using the corresponding 4-component > formats. > * This requires a fixup of the descriptor for bounds checks. > */ > if (desc->block.bits == 3 * 8 || > desc->block.bits == 3 * 16) { > v->fix_size3 |= (desc->block.bits / 24) << (2 * i); > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index a17dbc7..edc5b93 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -92,26 +92,26 @@ struct si_state_dsa { > > struct si_stencil_ref { > struct r600_atom atom; > struct pipe_stencil_ref state; > struct si_dsa_stencil_ref_part dsa_part; > }; > > struct si_vertex_element > { > unsigned count; > - uint32_t fix_fetch; > > /* Two bits per attribute indicating the size of each vector component > * in bytes if the size 3-workaround must be applied. > */ > uint32_t fix_size3; > + uint64_t fix_fetch; > > uint32_t rsrc_word3[SI_MAX_ATTRIBS]; > uint32_t format_size[SI_MAX_ATTRIBS]; > struct pipe_vertex_element elements[SI_MAX_ATTRIBS]; > }; > > union si_state { > struct { > struct si_state_blend *blend; > struct si_state_rasterizer *rasterizer; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 9967837..d2f04bc 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -927,21 +927,21 @@ static inline void si_shader_selector_key(struct > pipe_context *ctx, > case PIPE_SHADER_VERTEX: > if (sctx->vertex_elements) { > unsigned count = MIN2(sel->info.num_inputs, > sctx->vertex_elements->count); > for (i = 0; i < count; ++i) > key->part.vs.prolog.instance_divisors[i] = > > sctx->vertex_elements->elements[i].instance_divisor; > > key->mono.vs.fix_fetch = > sctx->vertex_elements->fix_fetch & > - u_bit_consecutive(0, 2 * count); > + u_bit_consecutive64(0, 4 * count); > } > if (sctx->tes_shader.cso) > key->as_ls = 1; > else if (sctx->gs_shader.cso) > key->as_es = 1; > else { > si_shader_selector_key_hw_vs(sctx, sel, key); > > if (sctx->ps_shader.cso && > sctx->ps_shader.cso->info.uses_primid) > key->part.vs.epilog.export_prim_id = 1; > diff --git a/src/util/bitscan.h b/src/util/bitscan.h > index a5dfa1f..4f8b608 100644 > --- a/src/util/bitscan.h > +++ b/src/util/bitscan.h > @@ -219,16 +219,25 @@ util_last_bit_signed(int i) > */ > static inline unsigned > u_bit_consecutive(unsigned start, unsigned count) > { > assert(start + count <= 32); > if (count == 32) > return ~0; > return ((1u << count) - 1) << start; > } > > +static inline unsigned > +u_bit_consecutive64(unsigned start, unsigned count)
And this should return uint64_t. Marek > +{ > + assert(start + count <= 64); > + if (count == 64) > + return ~(uint64_t)0; > + return (((uint64_t)1 << count) - 1) << start; > +} > + > > #ifdef __cplusplus > } > #endif > > #endif /* BITSCAN_H */ > -- > 2.7.4 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev