Re: [Mesa-dev] [PATCH 1/8] util: Move util_is_power_of_two to bitscan.h and rename to util_is_power_of_two_or_zero

Alejandro Piñeiro Tue, 27 Mar 2018 03:10:37 -0700

Reviewed-by: Alejandro Piñeiro <apinhe...@igalia.com>


On 27/03/18 04:00, Ian Romanick wrote:
> From: Ian Romanick <ian.d.roman...@intel.com>
>
> The new name make the zero-input behavior more obvious.  The next
> patch adds a new function with different zero-input behavior.
>
> Signed-off-by: Ian Romanick <ian.d.roman...@intel.com>
> Suggested-by: Matt Turner <matts...@gmail.com>
> ---
>  src/amd/common/ac_gpu_info.c                             |  4 ++--
>  src/amd/common/ac_surface.c                              |  2 +-
>  src/amd/vulkan/radv_formats.c                            |  4 ++--
>  src/broadcom/compiler/nir_to_vir.c                       |  4 ++--
>  src/gallium/auxiliary/gallivm/lp_bld_arit.c              |  2 +-
>  src/gallium/auxiliary/gallivm/lp_bld_debug.cpp           |  2 +-
>  src/gallium/auxiliary/gallivm/lp_bld_format_aos.c        |  4 ++--
>  src/gallium/auxiliary/gallivm/lp_bld_gather.c            |  8 ++++----
>  src/gallium/auxiliary/gallivm/lp_bld_pack.c              |  2 +-
>  src/gallium/auxiliary/gallivm/lp_bld_sample.c            |  6 +++---
>  src/gallium/auxiliary/util/u_math.h                      | 10 +---------
>  src/gallium/auxiliary/util/u_ringbuffer.c                |  2 +-
>  src/gallium/drivers/etnaviv/etnaviv_blt.c                |  2 +-
>  src/gallium/drivers/etnaviv/etnaviv_texture_state.c      |  3 ++-
>  src/gallium/drivers/freedreno/freedreno_query_hw.c       |  2 +-
>  src/gallium/drivers/i915/i915_state_sampler.c            |  3 ++-
>  src/gallium/drivers/llvmpipe/lp_state_fs.c               |  2 +-
>  src/gallium/drivers/llvmpipe/lp_texture.c                |  2 +-
>  src/gallium/drivers/nouveau/codegen/nv50_ir.cpp          |  2 +-
>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp |  4 ++--
>  src/gallium/drivers/nouveau/nv30/nv30_miptree.c          |  6 +++---
>  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c   |  2 +-
>  src/gallium/drivers/r300/r300_texture_desc.c             |  6 +++---
>  src/gallium/drivers/r600/r600_texture.c                  |  2 +-
>  src/gallium/drivers/radeon/r600_texture.c                |  2 +-
>  src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c      |  2 +-
>  src/gallium/drivers/softpipe/sp_texture.c                | 12 ++++++------
>  src/gallium/drivers/swr/swr_screen.cpp                   |  4 ++--
>  src/gallium/drivers/vc4/vc4_program.c                    |  4 ++--
>  src/intel/compiler/brw_fs.cpp                            |  4 ++--
>  src/intel/vulkan/anv_allocator.c                         |  6 +++---
>  src/intel/vulkan/anv_formats.c                           |  4 ++--
>  src/intel/vulkan/anv_nir_lower_multiview.c               |  2 +-
>  src/mesa/state_tracker/st_cb_readpixels.c                |  4 ++--
>  src/mesa/state_tracker/st_cb_texture.c                   |  6 +++---
>  src/util/bitscan.h                                       | 12 ++++++++++++
>  src/util/u_vector.c                                      |  4 ++--
>  37 files changed, 79 insertions(+), 73 deletions(-)
>
> diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
> index 73b5da0fe18..fe993824151 100644
> --- a/src/amd/common/ac_gpu_info.c
> +++ b/src/amd/common/ac_gpu_info.c
> @@ -331,8 +331,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
>       }
>       info->has_virtual_memory = true;
>  
> -     assert(util_is_power_of_two(dma.available_rings + 1));
> -     assert(util_is_power_of_two(compute.available_rings + 1));
> +     assert(util_is_power_of_two_or_zero(dma.available_rings + 1));
> +     assert(util_is_power_of_two_or_zero(compute.available_rings + 1));
>  
>       info->num_sdma_rings = util_bitcount(dma.available_rings);
>       info->num_compute_rings = util_bitcount(compute.available_rings);
> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
> index 92bdf1dedec..cca273b8606 100644
> --- a/src/amd/common/ac_surface.c
> +++ b/src/amd/common/ac_surface.c
> @@ -271,7 +271,7 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
>           AddrSurfInfoIn->bpp) {
>               unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
>  
> -             assert(util_is_power_of_two(AddrSurfInfoIn->bpp));
> +             assert(util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp));
>               AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
>       }
>  
> diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
> index da341a3a848..56b1edaed07 100644
> --- a/src/amd/vulkan/radv_formats.c
> +++ b/src/amd/vulkan/radv_formats.c
> @@ -602,13 +602,13 @@ radv_physical_device_get_format_properties(struct 
> radv_physical_device *physical
>                               tiled |= 
> VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
>                       }
>               }
> -             if (tiled && 
> util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
> +             if (tiled && 
> util_is_power_of_two_or_zero(vk_format_get_blocksize(format)) && !scaled) {
>                       tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
>                                VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
>               }
>       }
>  
> -     if (linear && util_is_power_of_two(vk_format_get_blocksize(format)) && 
> !scaled) {
> +     if (linear && 
> util_is_power_of_two_or_zero(vk_format_get_blocksize(format)) && !scaled) {
>               linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
>                         VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
>       }
> diff --git a/src/broadcom/compiler/nir_to_vir.c 
> b/src/broadcom/compiler/nir_to_vir.c
> index 893dfa160aa..188c99d74ee 100644
> --- a/src/broadcom/compiler/nir_to_vir.c
> +++ b/src/broadcom/compiler/nir_to_vir.c
> @@ -233,7 +233,7 @@ static struct qreg
>  ntq_get_alu_src(struct v3d_compile *c, nir_alu_instr *instr,
>                  unsigned src)
>  {
> -        assert(util_is_power_of_two(instr->dest.write_mask));
> +        assert(util_is_power_of_two_or_zero(instr->dest.write_mask));
>          unsigned chan = ffs(instr->dest.write_mask) - 1;
>          struct qreg r = ntq_get_src(c, instr->src[src].src,
>                                      instr->src[src].swizzle[chan]);
> @@ -862,7 +862,7 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
>          /* We have a scalar result, so the instruction should only have a
>           * single channel written to.
>           */
> -        assert(util_is_power_of_two(instr->dest.write_mask));
> +        assert(util_is_power_of_two_or_zero(instr->dest.write_mask));
>          ntq_store_dest(c, &instr->dest.dest,
>                         ffs(instr->dest.write_mask) - 1, result);
>  }
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index 321c6e4edf0..e922474ef61 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -1307,7 +1307,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
>     if(b == 2 && bld->type.floating)
>        return lp_build_add(bld, a, a);
>  
> -   if(util_is_power_of_two(b)) {
> +   if(util_is_power_of_two_or_zero(b)) {
>        unsigned shift = ffs(b) - 1;
>  
>        if(bld->type.floating) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> index f311fe7f690..23ada3d0433 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> @@ -60,7 +60,7 @@
>  extern "C" boolean
>  lp_check_alignment(const void *ptr, unsigned alignment)
>  {
> -   assert(util_is_power_of_two(alignment));
> +   assert(util_is_power_of_two_or_zero(alignment));
>     return ((uintptr_t)ptr & (alignment - 1)) == 0;
>  }
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
> index 36dedba34f0..b52acca1b3e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
> @@ -496,7 +496,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
>     if (format_matches_type(format_desc, type) &&
>         format_desc->block.bits <= type.width * 4 &&
>         /* XXX this shouldn't be needed */
> -       util_is_power_of_two(format_desc->block.bits)) {
> +       util_is_power_of_two_or_zero(format_desc->block.bits)) {
>        LLVMValueRef packed;
>        LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
>        struct lp_type fetch_type;
> @@ -609,7 +609,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
>         format_desc->block.width == 1 &&
>         format_desc->block.height == 1 &&
>         /* XXX this shouldn't be needed */
> -       util_is_power_of_two(format_desc->block.bits) &&
> +       util_is_power_of_two_or_zero(format_desc->block.bits) &&
>         format_desc->block.bits <= 32 &&
>         format_desc->is_bitmask &&
>         !format_desc->is_mixed &&
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
> index 7d11dcd3b64..8cabe9ef01e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
> @@ -118,7 +118,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
>      */
>     if (!aligned) {
>        LLVMSetAlignment(res, 1);
> -   } else if (!util_is_power_of_two(src_width)) {
> +   } else if (!util_is_power_of_two_or_zero(src_width)) {
>        /*
>         * Full alignment is impossible, assume the caller really meant
>         * the individual elements were aligned (e.g. 3x32bit format).
> @@ -130,7 +130,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
>         * this should cover all the 3-channel formats.
>         */
>        if (((src_width / 24) * 24 == src_width) &&
> -           util_is_power_of_two(src_width / 24)) {
> +           util_is_power_of_two_or_zero(src_width / 24)) {
>            LLVMSetAlignment(res, src_width / 24);
>        } else {
>           LLVMSetAlignment(res, 1);
> @@ -199,7 +199,7 @@ lp_build_gather_elem_vec(struct gallivm_state *gallivm,
>      */
>     if (!aligned) {
>        LLVMSetAlignment(res, 1);
> -   } else if (!util_is_power_of_two(src_width)) {
> +   } else if (!util_is_power_of_two_or_zero(src_width)) {
>        /*
>         * Full alignment is impossible, assume the caller really meant
>         * the individual elements were aligned (e.g. 3x32bit format).
> @@ -211,7 +211,7 @@ lp_build_gather_elem_vec(struct gallivm_state *gallivm,
>         * this should cover all the 3-channel formats.
>         */
>        if (((src_width / 24) * 24 == src_width) &&
> -           util_is_power_of_two(src_width / 24)) {
> +           util_is_power_of_two_or_zero(src_width / 24)) {
>            LLVMSetAlignment(res, src_width / 24);
>        } else {
>           LLVMSetAlignment(res, 1);
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
> index 7879826422d..b8b53a7d6eb 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
> @@ -219,7 +219,7 @@ lp_build_concat(struct gallivm_state *gallivm,
>     LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
>  
>     assert(src_type.length * num_vectors <= ARRAY_SIZE(shuffles));
> -   assert(util_is_power_of_two(num_vectors));
> +   assert(util_is_power_of_two_or_zero(num_vectors));
>  
>     new_length = src_type.length;
>  
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index ab9d051c911..81cb5060711 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -114,9 +114,9 @@ lp_sampler_static_texture_state(struct 
> lp_static_texture_state *state,
>     state->swizzle_a         = view->swizzle_a;
>  
>     state->target            = view->target;
> -   state->pot_width         = util_is_power_of_two(texture->width0);
> -   state->pot_height        = util_is_power_of_two(texture->height0);
> -   state->pot_depth         = util_is_power_of_two(texture->depth0);
> +   state->pot_width         = util_is_power_of_two_or_zero(texture->width0);
> +   state->pot_height        = util_is_power_of_two_or_zero(texture->height0);
> +   state->pot_depth         = util_is_power_of_two_or_zero(texture->depth0);
>     state->level_zero_only   = !view->u.tex.last_level;
>  
>     /*
> diff --git a/src/gallium/auxiliary/util/u_math.h 
> b/src/gallium/auxiliary/util/u_math.h
> index a441b5457b2..46d02978fd6 100644
> --- a/src/gallium/auxiliary/util/u_math.h
> +++ b/src/gallium/auxiliary/util/u_math.h
> @@ -179,14 +179,6 @@ util_fast_pow(float x, float y)
>     return util_fast_exp2(util_fast_log2(x) * y);
>  }
>  
> -/* Note that this counts zero as a power of two.
> - */
> -static inline boolean
> -util_is_power_of_two( unsigned v )
> -{
> -   return (v & (v-1)) == 0;
> -}
> -
>  
>  /**
>   * Floor(x), returned as int.
> @@ -459,7 +451,7 @@ util_next_power_of_two(unsigned x)
>     if (x <= 1)
>        return 1;
>  
> -   if (util_is_power_of_two(x))
> +   if (util_is_power_of_two_or_zero(x))
>        return x;
>  
>     val--;
> diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c 
> b/src/gallium/auxiliary/util/u_ringbuffer.c
> index 4d6166833e4..f6bb910671e 100644
> --- a/src/gallium/auxiliary/util/u_ringbuffer.c
> +++ b/src/gallium/auxiliary/util/u_ringbuffer.c
> @@ -27,7 +27,7 @@ struct util_ringbuffer *util_ringbuffer_create( unsigned 
> dwords )
>     if (!ring)
>        return NULL;
>  
> -   assert(util_is_power_of_two(dwords));
> +   assert(util_is_power_of_two_or_zero(dwords));
>     
>     ring->buf = MALLOC( dwords * sizeof(unsigned) );
>     if (ring->buf == NULL)
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_blt.c 
> b/src/gallium/drivers/etnaviv/etnaviv_blt.c
> index 5d783a4ad9c..c30c11ab61e 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_blt.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_blt.c
> @@ -178,7 +178,7 @@ emit_blt_copyimage(struct etna_cmd_stream *stream, const 
> struct blt_imgcopy_op *
>  static void
>  emit_blt_inplace(struct etna_cmd_stream *stream, const struct blt_inplace_op 
> *op)
>  {
> -   assert(op->bpp > 0 && util_is_power_of_two(op->bpp));
> +   assert(op->bpp > 0 && util_is_power_of_two_or_zero(op->bpp));
>     etna_cmd_stream_reserve(stream, 64*2); /* Never allow BLT sequences to be 
> broken up */
>     etna_set_state(stream, VIVS_BLT_ENABLE, 0x00000001);
>     etna_set_state(stream, VIVS_BLT_CONFIG,
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_texture_state.c 
> b/src/gallium/drivers/etnaviv/etnaviv_texture_state.c
> index faa073a71af..7100865f925 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_texture_state.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_texture_state.c
> @@ -158,7 +158,8 @@ etna_create_sampler_view_state(struct pipe_context *pctx, 
> struct pipe_resource *
>     /* Workaround for npot textures -- it appears that only CLAMP_TO_EDGE is
>      * supported when the appropriate capability is not set. */
>     if (!ctx->specs.npot_tex_any_wrap &&
> -       (!util_is_power_of_two(res->base.width0) || 
> !util_is_power_of_two(res->base.height0))) {
> +       (!util_is_power_of_two_or_zero(res->base.width0) ||
> +        !util_is_power_of_two_or_zero(res->base.height0))) {
>        sv->TE_SAMPLER_CONFIG0_MASK = ~(VIVS_TE_SAMPLER_CONFIG0_UWRAP__MASK |
>                                        VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK);
>        sv->TE_SAMPLER_CONFIG0 |=
> diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c 
> b/src/gallium/drivers/freedreno/freedreno_query_hw.c
> index 8b25e9cbcca..86a46faa730 100644
> --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
> +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
> @@ -301,7 +301,7 @@ fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
>       struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
>       pipe_reference_init(&samp->reference, 1);
>       samp->size = size;
> -     debug_assert(util_is_power_of_two(size));
> +     debug_assert(util_is_power_of_two_or_zero(size));
>       batch->next_sample_offset = align(batch->next_sample_offset, size);
>       samp->offset = batch->next_sample_offset;
>       /* NOTE: slab_alloc_st() does not zero out the buffer: */
> diff --git a/src/gallium/drivers/i915/i915_state_sampler.c 
> b/src/gallium/drivers/i915/i915_state_sampler.c
> index 84ed1514630..66a5778717a 100644
> --- a/src/gallium/drivers/i915/i915_state_sampler.c
> +++ b/src/gallium/drivers/i915/i915_state_sampler.c
> @@ -307,7 +307,8 @@ static void update_map(struct i915_context *i915,
>     int first_level = view->u.tex.first_level;
>     const uint num_levels = pt->last_level - first_level;
>     unsigned max_lod = num_levels * 4;
> -   bool is_npot = (!util_is_power_of_two(pt->width0) || 
> !util_is_power_of_two(pt->height0)); 
> +   bool is_npot = (!util_is_power_of_two_or_zero(pt->width0) ||
> +                   !util_is_power_of_two_or_zero(pt->height0));
>     uint format, pitch;
>  
>     /*
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
> b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> index 66645b07ac7..74b8d4dd96e 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> @@ -2240,7 +2240,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
>  
>     if (dst_count > src_count) {
>        if ((dst_type.width == 8 || dst_type.width == 16) &&
> -          util_is_power_of_two(dst_type.length) &&
> +          util_is_power_of_two_or_zero(dst_type.length) &&
>            dst_type.length * dst_type.width < 128) {
>           /*
>            * Never try to load values as 4xi8 which we will then
> diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c 
> b/src/gallium/drivers/llvmpipe/lp_texture.c
> index 162c74ad7dd..89852cc95c3 100644
> --- a/src/gallium/drivers/llvmpipe/lp_texture.c
> +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
> @@ -670,7 +670,7 @@ llvmpipe_get_format_alignment( enum pipe_format format )
>  
>     bytes = size / 8;
>  
> -   if (!util_is_power_of_two(bytes)) {
> +   if (!util_is_power_of_two_or_zero(bytes)) {
>        bytes /= desc->nr_channels;
>     }
>  
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> index 6f12df70a11..c987da99085 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> @@ -423,7 +423,7 @@ ImmediateValue::isNegative() const
>  bool
>  ImmediateValue::isPow2() const
>  {
> -   return util_is_power_of_two(reg.data.u32);
> +   return util_is_power_of_two_or_zero(reg.data.u32);
>  }
>  
>  void
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 48cf74950df..39177bd044b 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -1305,7 +1305,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> &imm0, int s)
>                   src->op == OP_SHR &&
>                   src->src(1).getImmediate(imm1) &&
>                   i->src(t).mod == Modifier(0) &&
> -                 util_is_power_of_two(imm0.reg.data.u32 + 1)) {
> +                 util_is_power_of_two_or_zero(imm0.reg.data.u32 + 1)) {
>           // low byte = offset, high byte = width
>           uint32_t ext = (util_last_bit(imm0.reg.data.u32) << 8) | 
> imm1.reg.data.u32;
>           i->op = OP_EXTBF;
> @@ -1314,7 +1314,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> &imm0, int s)
>        } else if (src->op == OP_SHL &&
>                   src->src(1).getImmediate(imm1) &&
>                   i->src(t).mod == Modifier(0) &&
> -                 util_is_power_of_two(~imm0.reg.data.u32 + 1) &&
> +                 util_is_power_of_two_or_zero(~imm0.reg.data.u32 + 1) &&
>                   util_last_bit(~imm0.reg.data.u32) <= imm1.reg.data.u32) {
>           i->op = OP_MOV;
>           i->setSrc(s, NULL);
> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c 
> b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
> index 165b8f29b4b..4f991776323 100644
> --- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
> +++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
> @@ -401,9 +401,9 @@ nv30_miptree_create(struct pipe_screen *pscreen,
>  
>     if ((pt->target == PIPE_TEXTURE_RECT) ||
>         (pt->bind & PIPE_BIND_SCANOUT) ||
> -       !util_is_power_of_two(pt->width0) ||
> -       !util_is_power_of_two(pt->height0) ||
> -       !util_is_power_of_two(pt->depth0) ||
> +       !util_is_power_of_two_or_zero(pt->width0) ||
> +       !util_is_power_of_two_or_zero(pt->height0) ||
> +       !util_is_power_of_two_or_zero(pt->depth0) ||
>         util_format_is_compressed(pt->format) ||
>         util_format_is_float(pt->format) || mt->ms_mode) {
>        mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index 37a67619588..8e2192d3de2 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -183,7 +183,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
>     }
>  
>     if (nr_cbufs == 0 && !fb->zsbuf) {
> -      assert(util_is_power_of_two(fb->samples));
> +      assert(util_is_power_of_two_or_zero(fb->samples));
>        assert(fb->samples <= 8);
>  
>        nvc0_fb_set_null_rt(push, 0, fb->layers);
> diff --git a/src/gallium/drivers/r300/r300_texture_desc.c 
> b/src/gallium/drivers/r300/r300_texture_desc.c
> index 2442d726cd1..37e7b5fc4e2 100644
> --- a/src/gallium/drivers/r300/r300_texture_desc.c
> +++ b/src/gallium/drivers/r300/r300_texture_desc.c
> @@ -270,15 +270,15 @@ static void r300_setup_miptree(struct r300_screen 
> *screen,
>  static void r300_setup_flags(struct r300_resource *tex)
>  {
>      tex->tex.uses_stride_addressing =
> -        !util_is_power_of_two(tex->b.b.width0) ||
> +        !util_is_power_of_two_or_zero(tex->b.b.width0) ||
>          (tex->tex.stride_in_bytes_override &&
>           r300_stride_to_width(tex->b.b.format,
>                           tex->tex.stride_in_bytes_override) != 
> tex->b.b.width0);
>  
>      tex->tex.is_npot =
>          tex->tex.uses_stride_addressing ||
> -        !util_is_power_of_two(tex->b.b.height0) ||
> -        !util_is_power_of_two(tex->b.b.depth0);
> +        !util_is_power_of_two_or_zero(tex->b.b.height0) ||
> +        !util_is_power_of_two_or_zero(tex->b.b.depth0);
>  }
>  
>  static void r300_setup_cbzb_flags(struct r300_screen *rscreen,
> diff --git a/src/gallium/drivers/r600/r600_texture.c 
> b/src/gallium/drivers/r600/r600_texture.c
> index 806bc278b08..c39c00c2e3e 100644
> --- a/src/gallium/drivers/r600/r600_texture.c
> +++ b/src/gallium/drivers/r600/r600_texture.c
> @@ -217,7 +217,7 @@ static int r600_init_surface(struct r600_common_screen 
> *rscreen,
>               bpe = 4; /* stencil is allocated separately on evergreen */
>       } else {
>               bpe = util_format_get_blocksize(ptex->format);
> -             assert(util_is_power_of_two(bpe));
> +             assert(util_is_power_of_two_or_zero(bpe));
>       }
>  
>       if (!is_flushed_depth && is_depth) {
> diff --git a/src/gallium/drivers/radeon/r600_texture.c 
> b/src/gallium/drivers/radeon/r600_texture.c
> index 3a0a79187b8..8449413f286 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -240,7 +240,7 @@ static int r600_init_surface(struct si_screen *sscreen,
>               bpe = 4; /* stencil is allocated separately on evergreen */
>       } else {
>               bpe = util_format_get_blocksize(ptex->format);
> -             assert(util_is_power_of_two(bpe));
> +             assert(util_is_power_of_two_or_zero(bpe));
>       }
>  
>       if (!is_flushed_depth && is_depth) {
> diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
> b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> index 90cc2e0d981..79fdebe8388 100644
> --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
> @@ -192,7 +192,7 @@ LLVMValueRef si_llvm_bound_index(struct si_shader_context 
> *ctx,
>       LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
>       LLVMValueRef cc;
>  
> -     if (util_is_power_of_two(num)) {
> +     if (util_is_power_of_two_or_zero(num)) {
>               index = LLVMBuildAnd(builder, index, c_max, "");
>       } else {
>               /* In theory, this MAX pattern should result in code that is
> diff --git a/src/gallium/drivers/softpipe/sp_texture.c 
> b/src/gallium/drivers/softpipe/sp_texture.c
> index ea5e2c64b84..c49bfcaba55 100644
> --- a/src/gallium/drivers/softpipe/sp_texture.c
> +++ b/src/gallium/drivers/softpipe/sp_texture.c
> @@ -166,9 +166,9 @@ softpipe_resource_create_front(struct pipe_screen *screen,
>     pipe_reference_init(&spr->base.reference, 1);
>     spr->base.screen = screen;
>  
> -   spr->pot = (util_is_power_of_two(templat->width0) &&
> -               util_is_power_of_two(templat->height0) &&
> -               util_is_power_of_two(templat->depth0));
> +   spr->pot = (util_is_power_of_two_or_zero(templat->width0) &&
> +               util_is_power_of_two_or_zero(templat->height0) &&
> +               util_is_power_of_two_or_zero(templat->depth0));
>  
>     if (spr->base.bind & (PIPE_BIND_DISPLAY_TARGET |
>                        PIPE_BIND_SCANOUT |
> @@ -231,9 +231,9 @@ softpipe_resource_from_handle(struct pipe_screen *screen,
>     pipe_reference_init(&spr->base.reference, 1);
>     spr->base.screen = screen;
>  
> -   spr->pot = (util_is_power_of_two(templat->width0) &&
> -               util_is_power_of_two(templat->height0) &&
> -               util_is_power_of_two(templat->depth0));
> +   spr->pot = (util_is_power_of_two_or_zero(templat->width0) &&
> +               util_is_power_of_two_or_zero(templat->height0) &&
> +               util_is_power_of_two_or_zero(templat->depth0));
>  
>     spr->dt = winsys->displaytarget_from_handle(winsys,
>                                                 templat,
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
> b/src/gallium/drivers/swr/swr_screen.cpp
> index dd2d003d15d..880a177c399 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -105,7 +105,7 @@ swr_is_format_supported(struct pipe_screen *_screen,
>        return FALSE;
>  
>     if ((sample_count > screen->msaa_max_count)
> -      || !util_is_power_of_two(sample_count))
> +      || !util_is_power_of_two_or_zero(sample_count))
>        return FALSE;
>  
>     if (bind & PIPE_BIND_DISPLAY_TARGET) {
> @@ -1103,7 +1103,7 @@ swr_validate_env_options(struct swr_screen *screen)
>     int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 1);
>     if (msaa_max_count != 1) {
>        if ((msaa_max_count < 1) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
> -            || !util_is_power_of_two(msaa_max_count)) {
> +            || !util_is_power_of_two_or_zero(msaa_max_count)) {
>           fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
>           fprintf(stderr, "must be power of 2 between 1 and %d" \
>                           " (or 1 to disable msaa)\n",
> diff --git a/src/gallium/drivers/vc4/vc4_program.c 
> b/src/gallium/drivers/vc4/vc4_program.c
> index 2ec6aa471d4..367613130bb 100644
> --- a/src/gallium/drivers/vc4/vc4_program.c
> +++ b/src/gallium/drivers/vc4/vc4_program.c
> @@ -313,7 +313,7 @@ static struct qreg
>  ntq_get_alu_src(struct vc4_compile *c, nir_alu_instr *instr,
>                  unsigned src)
>  {
> -        assert(util_is_power_of_two(instr->dest.write_mask));
> +        assert(util_is_power_of_two_or_zero(instr->dest.write_mask));
>          unsigned chan = ffs(instr->dest.write_mask) - 1;
>          struct qreg r = ntq_get_src(c, instr->src[src].src,
>                                      instr->src[src].swizzle[chan]);
> @@ -1363,7 +1363,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr 
> *instr)
>          /* We have a scalar result, so the instruction should only have a
>           * single channel written to.
>           */
> -        assert(util_is_power_of_two(instr->dest.write_mask));
> +        assert(util_is_power_of_two_or_zero(instr->dest.write_mask));
>          ntq_store_dest(c, &instr->dest.dest,
>                         ffs(instr->dest.write_mask) - 1, result);
>  }
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 3d454c3db14..fdcc909a3d5 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -1976,7 +1976,7 @@ struct cplx_align {
>  static void
>  cplx_align_assert_sane(struct cplx_align a)
>  {
> -   assert(a.mul > 0 && util_is_power_of_two(a.mul));
> +   assert(a.mul > 0 && util_is_power_of_two_or_zero(a.mul));
>     assert(a.offset < a.mul);
>  }
>  
> @@ -2028,7 +2028,7 @@ static void
>  mark_uniform_slots_read(struct uniform_slot_info *slots,
>                          unsigned num_slots, unsigned alignment)
>  {
> -   assert(alignment > 0 && util_is_power_of_two(alignment));
> +   assert(alignment > 0 && util_is_power_of_two_or_zero(alignment));
>     assert(alignment <= CPLX_ALIGN_MAX_MUL);
>  
>     /* We can't align a slot to anything less than the slot size */
> diff --git a/src/intel/vulkan/anv_allocator.c 
> b/src/intel/vulkan/anv_allocator.c
> index a27af4eccc4..f884ac3b827 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -619,7 +619,7 @@ anv_state_pool_init(struct anv_state_pool *pool,
>     if (result != VK_SUCCESS)
>        return result;
>  
> -   assert(util_is_power_of_two(block_size));
> +   assert(util_is_power_of_two_or_zero(block_size));
>     pool->block_size = block_size;
>     pool->back_alloc_free_list = ANV_FREE_LIST_EMPTY;
>     for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
> @@ -814,7 +814,7 @@ done:
>  static void
>  anv_state_pool_free_no_vg(struct anv_state_pool *pool, struct anv_state 
> state)
>  {
> -   assert(util_is_power_of_two(state.alloc_size));
> +   assert(util_is_power_of_two_or_zero(state.alloc_size));
>     unsigned bucket = anv_state_pool_get_bucket(state.alloc_size);
>  
>     if (state.offset < 0) {
> @@ -1041,7 +1041,7 @@ anv_bo_pool_free(struct anv_bo_pool *pool, const struct 
> anv_bo *bo_in)
>     struct bo_pool_bo_link *link = bo.map;
>     VG_NOACCESS_WRITE(&link->bo, bo);
>  
> -   assert(util_is_power_of_two(bo.size));
> +   assert(util_is_power_of_two_or_zero(bo.size));
>     const unsigned size_log2 = ilog2_round_up(bo.size);
>     const unsigned bucket = size_log2 - 12;
>     assert(bucket < ARRAY_SIZE(pool->free_list));
> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
> index 8d1ea402f0f..085706402c1 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -437,7 +437,7 @@ anv_get_format_plane(const struct gen_device_info 
> *devinfo, VkFormat vk_format,
>        isl_format_get_layout(plane_format.isl_format);
>  
>     if (tiling == VK_IMAGE_TILING_OPTIMAL &&
> -       !util_is_power_of_two(isl_layout->bpb)) {
> +       !util_is_power_of_two_or_zero(isl_layout->bpb)) {
>        /* Tiled formats *must* be power-of-two because we need up upload
>         * them with the render pipeline.  For 3-channel formats, we fix
>         * this by switching them over to RGBX or RGBA formats under the
> @@ -563,7 +563,7 @@ get_image_format_features(const struct gen_device_info 
> *devinfo,
>      */
>     if (vk_tiling == VK_IMAGE_TILING_OPTIMAL &&
>         base_isl_format != ISL_FORMAT_UNSUPPORTED &&
> -       !util_is_power_of_two(isl_format_layouts[base_isl_format].bpb) &&
> +       
> !util_is_power_of_two_or_zero(isl_format_layouts[base_isl_format].bpb) &&
>         isl_format_rgb_to_rgbx(base_isl_format) == ISL_FORMAT_UNSUPPORTED) {
>        flags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
>        flags &= ~VK_FORMAT_FEATURE_BLIT_DST_BIT;
> diff --git a/src/intel/vulkan/anv_nir_lower_multiview.c 
> b/src/intel/vulkan/anv_nir_lower_multiview.c
> index 365a70d7579..6822595728c 100644
> --- a/src/intel/vulkan/anv_nir_lower_multiview.c
> +++ b/src/intel/vulkan/anv_nir_lower_multiview.c
> @@ -87,7 +87,7 @@ build_view_index(struct lower_multiview_state *state)
>              nir_umod(b, nir_load_instance_id(b),
>                          nir_imm_int(b, _mesa_bitcount(state->view_mask)));
>  
> -         if (util_is_power_of_two(state->view_mask + 1)) {
> +         if (util_is_power_of_two_or_zero(state->view_mask + 1)) {
>              /* If we have a full view mask, then compacted is what we want */
>              state->view_index = compacted;
>           } else {
> diff --git a/src/mesa/state_tracker/st_cb_readpixels.c 
> b/src/mesa/state_tracker/st_cb_readpixels.c
> index 84dd2d548e3..29181216071 100644
> --- a/src/mesa/state_tracker/st_cb_readpixels.c
> +++ b/src/mesa/state_tracker/st_cb_readpixels.c
> @@ -270,8 +270,8 @@ blit_to_staging(struct st_context *st, struct 
> st_renderbuffer *strb,
>     /* We are creating a texture of the size of the region being read back.
>      * Need to check for NPOT texture support. */
>     if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
> -       (!util_is_power_of_two(width) ||
> -        !util_is_power_of_two(height)))
> +       (!util_is_power_of_two_or_zero(width) ||
> +        !util_is_power_of_two_or_zero(height)))
>        return NULL;
>  
>     /* create the destination texture */
> diff --git a/src/mesa/state_tracker/st_cb_texture.c 
> b/src/mesa/state_tracker/st_cb_texture.c
> index 6345ead6396..120b7c681d5 100644
> --- a/src/mesa/state_tracker/st_cb_texture.c
> +++ b/src/mesa/state_tracker/st_cb_texture.c
> @@ -1527,9 +1527,9 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
>  
>     /* Check for NPOT texture support. */
>     if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
> -       (!util_is_power_of_two(src_templ.width0) ||
> -        !util_is_power_of_two(src_templ.height0) ||
> -        !util_is_power_of_two(src_templ.depth0))) {
> +       (!util_is_power_of_two_or_zero(src_templ.width0) ||
> +        !util_is_power_of_two_or_zero(src_templ.height0) ||
> +        !util_is_power_of_two_or_zero(src_templ.depth0))) {
>        goto fallback;
>     }
>  
> diff --git a/src/util/bitscan.h b/src/util/bitscan.h
> index 611e8120596..2d4e46ec0f1 100644
> --- a/src/util/bitscan.h
> +++ b/src/util/bitscan.h
> @@ -31,6 +31,7 @@
>  
>  #include <assert.h>
>  #include <stdint.h>
> +#include <stdbool.h>
>  #include <string.h>
>  
>  #if defined(_MSC_VER)
> @@ -107,6 +108,17 @@ u_bit_scan64(uint64_t *mask)
>     return i;
>  }
>  
> +/* Determine if an unsigned value is a power of two.
> + *
> + * \note
> + * Zero is treated as a power of two.
> + */
> +static inline bool
> +util_is_power_of_two_or_zero(unsigned v)
> +{
> +   return (v & (v - 1)) == 0;
> +}
> +
>  /* For looping over a bitmask when you want to loop over consecutive bits
>   * manually, for example:
>   *
> diff --git a/src/util/u_vector.c b/src/util/u_vector.c
> index 0de492ccf9a..bec6e5bbc30 100644
> --- a/src/util/u_vector.c
> +++ b/src/util/u_vector.c
> @@ -37,8 +37,8 @@
>  int
>  u_vector_init(struct u_vector *vector, uint32_t element_size, uint32_t size)
>  {
> -   assert(util_is_power_of_two(size));
> -   assert(element_size < size && util_is_power_of_two(element_size));
> +   assert(util_is_power_of_two_or_zero(size));
> +   assert(element_size < size && util_is_power_of_two_or_zero(element_size));
>  
>     vector->head = 0;
>     vector->tail = 0;

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/8] util: Move util_is_power_of_two to bitscan.h and rename to util_is_power_of_two_or_zero

Reply via email to