Hi Brian, On Friday, August 05, 2016 09:27:34 Brian Paul wrote: > > As requested with the initial creation of util/bitscan.h > > now move other bitscan related functions into util. > > Make use of win32 intrinsics for util_last_bit/fls if present. > > > > Signed-off-by: Mathias Fröhlich <mathias.froehl...@web.de> > > > > > > Any testing especially on win32 is apprechiated. > > We need to include "c99_compat.h" in u_bitcast.h in order to get the > "inline" macro.
Confused now: The file src/util/bitscan.h already contains the mentioned include. I think the patch should safely have 'inline' present when it needs? Oh, I see you did test that for me and failed on win32 by the recent introduction of u_bitcast.h that does not include c99_compat.h - right? best Mathias > > -Brian > > > > > > Please review > > > > Mathias > > > > > > --- > > src/compiler/glsl/glsl_to_nir.cpp | 2 +- > > src/gallium/auxiliary/util/u_math.h | 64 ------------------ > > src/mesa/drivers/dri/i965/brw_cs.c | 2 +- > > src/mesa/drivers/dri/i965/brw_draw.c | 10 +-- > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 6 +- > > src/mesa/drivers/dri/i965/brw_program.c | 2 +- > > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- > > src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- > > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- > > src/mesa/main/imports.h | 45 ------------- > > src/mesa/program/prog_to_nir.c | 6 +- > > src/util/bitscan.h | 80 > > +++++++++++++++++++++++ > > 12 files changed, 97 insertions(+), 126 deletions(-) > > > > diff --git a/src/compiler/glsl/glsl_to_nir.cpp > > b/src/compiler/glsl/glsl_to_nir.cpp > > index 20302e3..d3cc5b4 100644 > > --- a/src/compiler/glsl/glsl_to_nir.cpp > > +++ b/src/compiler/glsl/glsl_to_nir.cpp > > @@ -146,7 +146,7 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, > > shader->info.name = ralloc_asprintf(shader, "GLSL%d", > > shader_prog->Name); > > if (shader_prog->Label) > > shader->info.label = ralloc_strdup(shader, shader_prog->Label); > > - shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed); > > + shader->info.num_textures = util_last_bit(sh->Program->SamplersUsed); > > shader->info.num_ubos = sh->NumUniformBlocks; > > shader->info.num_abos = shader_prog->NumAtomicBuffers; > > shader->info.num_ssbos = sh->NumShaderStorageBlocks; > > diff --git a/src/gallium/auxiliary/util/u_math.h > > b/src/gallium/auxiliary/util/u_math.h > > index 1661e63..a923271 100644 > > --- a/src/gallium/auxiliary/util/u_math.h > > +++ b/src/gallium/auxiliary/util/u_math.h > > @@ -347,70 +347,6 @@ util_half_inf_sign(int16_t x) > > > > > > /** > > - * Find last bit set in a word. The least significant bit is 1. > > - * Return 0 if no bits are set. > > - */ > > -static inline unsigned > > -util_last_bit(unsigned u) > > -{ > > -#if defined(HAVE___BUILTIN_CLZ) > > - return u == 0 ? 0 : 32 - __builtin_clz(u); > > -#else > > - unsigned r = 0; > > - while (u) { > > - r++; > > - u >>= 1; > > - } > > - return r; > > -#endif > > -} > > - > > -/** > > - * Find last bit set in a word. The least significant bit is 1. > > - * Return 0 if no bits are set. > > - */ > > -static inline unsigned > > -util_last_bit64(uint64_t u) > > -{ > > -#if defined(HAVE___BUILTIN_CLZLL) > > - return u == 0 ? 0 : 64 - __builtin_clzll(u); > > -#else > > - unsigned r = 0; > > - while (u) { > > - r++; > > - u >>= 1; > > - } > > - return r; > > -#endif > > -} > > - > > -/** > > - * Find last bit in a word that does not match the sign bit. The least > > - * significant bit is 1. > > - * Return 0 if no bits are set. > > - */ > > -static inline unsigned > > -util_last_bit_signed(int i) > > -{ > > - if (i >= 0) > > - return util_last_bit(i); > > - else > > - return util_last_bit(~(unsigned)i); > > -} > > - > > -/* Returns a bitfield in which the first count bits starting at start are > > - * set. > > - */ > > -static inline unsigned > > -u_bit_consecutive(unsigned start, unsigned count) > > -{ > > - assert(start + count <= 32); > > - if (count == 32) > > - return ~0; > > - return ((1u << count) - 1) << start; > > -} > > - > > -/** > > * Return float bits. > > */ > > static inline unsigned > > diff --git a/src/mesa/drivers/dri/i965/brw_cs.c > > b/src/mesa/drivers/dri/i965/brw_cs.c > > index 655adc1..6685acd 100644 > > --- a/src/mesa/drivers/dri/i965/brw_cs.c > > +++ b/src/mesa/drivers/dri/i965/brw_cs.c > > @@ -220,7 +220,7 @@ brw_upload_cs_prog(struct brw_context *brw) > > return; > > > > brw->cs.base.sampler_count = > > - _mesa_fls(ctx->ComputeProgram._Current->Base.SamplersUsed); > > + util_last_bit(ctx->ComputeProgram._Current->Base.SamplersUsed); > > > > brw_cs_populate_key(brw, &key); > > > > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > > b/src/mesa/drivers/dri/i965/brw_draw.c > > index d7a1ba3..9b1e18c 100644 > > --- a/src/mesa/drivers/dri/i965/brw_draw.c > > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > > @@ -452,15 +452,15 @@ brw_try_draw_prims(struct gl_context *ctx, > > * index. > > */ > > brw->wm.base.sampler_count = > > - _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); > > + util_last_bit(ctx->FragmentProgram._Current->Base.SamplersUsed); > > brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? > > - _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; > > + util_last_bit(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; > > brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ? > > - _mesa_fls(ctx->TessEvalProgram._Current->Base.SamplersUsed) : 0; > > + util_last_bit(ctx->TessEvalProgram._Current->Base.SamplersUsed) : 0; > > brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ? > > - _mesa_fls(ctx->TessCtrlProgram._Current->Base.SamplersUsed) : 0; > > + util_last_bit(ctx->TessCtrlProgram._Current->Base.SamplersUsed) : 0; > > brw->vs.base.sampler_count = > > - _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); > > + util_last_bit(ctx->VertexProgram._Current->Base.SamplersUsed); > > > > intel_prepare_render(brw); > > brw_predraw_set_aux_buffers(brw); > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > index c1f413b..117eabe 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > @@ -1845,7 +1845,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg > > &vertex_count) > > fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); > > abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu)); > > unsigned log2_bits_per_vertex = > > - _mesa_fls(gs_compile->control_data_bits_per_vertex); > > + util_last_bit(gs_compile->control_data_bits_per_vertex); > > abld.SHR(dword_index, prev_count, brw_imm_ud(6u - > > log2_bits_per_vertex)); > > > > if (per_slot_offset.file != BAD_FILE) { > > @@ -2789,7 +2789,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder > > &bld, > > if (mask == 0) > > break; > > > > - unsigned num_components = _mesa_fls(mask); > > + unsigned num_components = util_last_bit(mask); > > enum opcode opcode; > > > > /* We can only pack two 64-bit components in a single message, so > > send > > @@ -4547,7 +4547,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, > > nir_tex_instr *instr) > > nir_ssa_def_components_read(&instr->dest.ssa): > > (1 << dest_size) - 1; > > assert(write_mask != 0); /* dead code should have been eliminated */ > > - inst->regs_written = _mesa_fls(write_mask) * dispatch_width / 8; > > + inst->regs_written = util_last_bit(write_mask) * dispatch_width / 8; > > } else { > > inst->regs_written = 4 * dispatch_width / 8; > > } > > diff --git a/src/mesa/drivers/dri/i965/brw_program.c > > b/src/mesa/drivers/dri/i965/brw_program.c > > index 7785490..0e55c7b 100644 > > --- a/src/mesa/drivers/dri/i965/brw_program.c > > +++ b/src/mesa/drivers/dri/i965/brw_program.c > > @@ -674,7 +674,7 @@ brw_setup_tex_for_precompile(struct brw_context *brw, > > struct gl_program *prog) > > { > > const bool has_shader_channel_select = brw->is_haswell || brw->gen >= > > 8; > > - unsigned sampler_count = _mesa_fls(prog->SamplersUsed); > > + unsigned sampler_count = util_last_bit(prog->SamplersUsed); > > for (unsigned i = 0; i < sampler_count; i++) { > > if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << > > i))) { > > /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */ > > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > > b/src/mesa/drivers/dri/i965/brw_shader.cpp > > index 559e44c..62bad9b 100644 > > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > > @@ -1157,7 +1157,7 @@ > > brw_assign_common_binding_table_offsets(gl_shader_stage stage, > > uint32_t > > next_binding_table_offset) > > { > > const struct gl_linked_shader *shader = NULL; > > - int num_textures = _mesa_fls(prog->SamplersUsed); > > + int num_textures = util_last_bit(prog->SamplersUsed); > > > > if (shader_prog) > > shader = shader_prog->_LinkedShaders[stage]; > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > > b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > > index 927438f..c5886d4 100644 > > --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > > @@ -334,7 +334,7 @@ vec4_gs_visitor::emit_control_data_bits() > > emit(ADD(dst_reg(prev_count), this->vertex_count, > > brw_imm_ud(0xffffffffu))); > > unsigned log2_bits_per_vertex = > > - _mesa_fls(c->control_data_bits_per_vertex); > > + util_last_bit(c->control_data_bits_per_vertex); > > emit(SHR(dst_reg(dword_index), prev_count, > > brw_imm_ud(6 - log2_bits_per_vertex))); > > } > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > > index 9bee7dd..a53f9da 100644 > > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > > @@ -998,7 +998,7 @@ update_stage_texture_surfaces(struct brw_context *brw, > > else > > surf_offset += > > stage_state->prog_data->binding_table.plane_start[plane]; > > > > - unsigned num_samplers = _mesa_fls(prog->SamplersUsed); > > + unsigned num_samplers = util_last_bit(prog->SamplersUsed); > > for (unsigned s = 0; s < num_samplers; s++) { > > surf_offset[s] = 0; > > > > diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h > > index 05cc5ca..21bcb10 100644 > > --- a/src/mesa/main/imports.h > > +++ b/src/mesa/main/imports.h > > @@ -339,51 +339,6 @@ extern unsigned int > > _mesa_bitcount_64(uint64_t n); > > #endif > > > > -/** > > - * Find the last (most significant) bit set in a word. > > - * > > - * Essentially ffs() in the reverse direction. > > - */ > > -static inline unsigned int > > -_mesa_fls(unsigned int n) > > -{ > > -#ifdef HAVE___BUILTIN_CLZ > > - return n == 0 ? 0 : 32 - __builtin_clz(n); > > -#else > > - unsigned int v = 1; > > - > > - if (n == 0) > > - return 0; > > - > > - while (n >>= 1) > > - v++; > > - > > - return v; > > -#endif > > -} > > - > > -/** > > - * Find the last (most significant) bit set in a uint64_t value. > > - * > > - * Essentially ffsll() in the reverse direction. > > - */ > > -static inline unsigned int > > -_mesa_flsll(uint64_t n) > > -{ > > -#ifdef HAVE___BUILTIN_CLZLL > > - return n == 0 ? 0 : 64 - __builtin_clzll(n); > > -#else > > - unsigned int v = 1; > > - > > - if (n == 0) > > - return 0; > > - > > - while (n >>= 1) > > - v++; > > - > > - return v; > > -#endif > > -} > > > > static inline bool > > _mesa_half_is_negative(GLhalfARB h) > > diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c > > index 9e01151..1efd1a1 100644 > > --- a/src/mesa/program/prog_to_nir.c > > +++ b/src/mesa/program/prog_to_nir.c > > @@ -887,7 +887,7 @@ setup_registers_and_variables(struct ptn_compile *c) > > struct nir_shader *shader = b->shader; > > > > /* Create input variables. */ > > - const int num_inputs = _mesa_flsll(c->prog->InputsRead); > > + const int num_inputs = util_last_bit64(c->prog->InputsRead); > > for (int i = 0; i < num_inputs; i++) { > > if (!(c->prog->InputsRead & BITFIELD64_BIT(i))) > > continue; > > @@ -948,7 +948,7 @@ setup_registers_and_variables(struct ptn_compile *c) > > } > > > > /* Create output registers and variables. */ > > - int max_outputs = _mesa_fls(c->prog->OutputsWritten); > > + int max_outputs = util_last_bit(c->prog->OutputsWritten); > > c->output_regs = rzalloc_array(c, nir_register *, max_outputs); > > > > for (int i = 0; i < max_outputs; i++) { > > @@ -1043,7 +1043,7 @@ prog_to_nir(const struct gl_program *prog, > > ptn_add_output_stores(c); > > > > s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id); > > - s->info.num_textures = _mesa_fls(prog->SamplersUsed); > > + s->info.num_textures = util_last_bit(prog->SamplersUsed); > > s->info.num_ubos = 0; > > s->info.num_abos = 0; > > s->info.num_ssbos = 0; > > diff --git a/src/util/bitscan.h b/src/util/bitscan.h > > index 4999b74..a5bb34e 100644 > > --- a/src/util/bitscan.h > > +++ b/src/util/bitscan.h > > @@ -29,6 +29,7 @@ > > #ifndef BITSCAN_H > > #define BITSCAN_H > > > > +#include <assert.h> > > #include <stdint.h> > > > > #if defined(_MSC_VER) > > @@ -146,6 +147,85 @@ u_bit_scan_consecutive_range64(uint64_t *mask, int > > *start, int *count) > > } > > > > > > +/** > > + * Find last bit set in a word. The least significant bit is 1. > > + * Return 0 if no bits are set. > > + * Essentially ffs() in the reverse direction. > > + */ > > +static inline unsigned > > +util_last_bit(unsigned u) > > +{ > > +#if defined(HAVE___BUILTIN_CLZ) > > + return u == 0 ? 0 : 32 - __builtin_clz(u); > > +#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64) > > + unsigned long index; > > + if (_BitScanReverse(&index, u)) > > + return index; > > + else > > + return 0; > > +#else > > + unsigned r = 0; > > + while (u) { > > + r++; > > + u >>= 1; > > + } > > + return r; > > +#endif > > +} > > + > > +/** > > + * Find last bit set in a word. The least significant bit is 1. > > + * Return 0 if no bits are set. > > + * Essentially ffsll() in the reverse direction. > > + */ > > +static inline unsigned > > +util_last_bit64(uint64_t u) > > +{ > > +#if defined(HAVE___BUILTIN_CLZLL) > > + return u == 0 ? 0 : 64 - __builtin_clzll(u); > > +#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM || _M_IA64) > > + unsigned long index; > > + if (_BitScanReverse64(&index, u)) > > + return index; > > + else > > + return 0; > > +#else > > + unsigned r = 0; > > + while (u) { > > + r++; > > + u >>= 1; > > + } > > + return r; > > +#endif > > +} > > + > > +/** > > + * Find last bit in a word that does not match the sign bit. The least > > + * significant bit is 1. > > + * Return 0 if no bits are set. > > + */ > > +static inline unsigned > > +util_last_bit_signed(int i) > > +{ > > + if (i >= 0) > > + return util_last_bit(i); > > + else > > + return util_last_bit(~(unsigned)i); > > +} > > + > > +/* Returns a bitfield in which the first count bits starting at start are > > + * set. > > + */ > > +static inline unsigned > > +u_bit_consecutive(unsigned start, unsigned count) > > +{ > > + assert(start + count <= 32); > > + if (count == 32) > > + return ~0; > > + return ((1u << count) - 1) << start; > > +} > > + > > + > > #ifdef __cplusplus > > } > > #endif > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev