LGTM. Jose
----- Original Message ----- > From: Roland Scheidegger <srol...@vmware.com> > > This is just preparation for per-pixel (or per-quad in case of multiple > quads) > min/mag filter since some assumptions about number of miplevels being equal > to number of lods no longer holds true. > This change does not change behavior yet (though theoretically when forcing > per-element path it might be slower with different min/mag filter since the > code will respect this setting even when there's no mip maps now in this > case, > so some lod calcs will be done per-element just ultimately still the same > filter used for all pixels). > --- > src/gallium/auxiliary/gallivm/lp_bld_sample.c | 126 +++++++++--------- > src/gallium/auxiliary/gallivm/lp_bld_sample.h | 13 +- > src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 20 +-- > src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 141 > ++++++++++++--------- > 4 files changed, 169 insertions(+), 131 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample.c > index 89d7249..e1cfd78 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c > @@ -217,7 +217,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > struct lp_build_context *float_size_bld = &bld->float_size_in_bld; > struct lp_build_context *float_bld = &bld->float_bld; > struct lp_build_context *coord_bld = &bld->coord_bld; > - struct lp_build_context *levelf_bld = &bld->levelf_bld; > + struct lp_build_context *rho_bld = &bld->lodf_bld; > const unsigned dims = bld->dims; > LLVMValueRef ddx_ddy[2]; > LLVMBuilderRef builder = bld->gallivm->builder; > @@ -231,7 +231,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > LLVMValueRef first_level, first_level_vec; > unsigned length = coord_bld->type.length; > unsigned num_quads = length / 4; > - boolean rho_per_quad = levelf_bld->type.length != length; > + boolean rho_per_quad = rho_bld->type.length != length; > unsigned i; > LLVMValueRef i32undef = > LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); > LLVMValueRef rho_xvec, rho_yvec; > @@ -259,18 +259,18 @@ lp_build_rho(struct lp_build_sample_context *bld, > */ > if (rho_per_quad) { > rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, > - levelf_bld->type, cube_rho, 0); > + rho_bld->type, cube_rho, 0); > } > else { > rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4); > } > if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) { > - rho = lp_build_sqrt(levelf_bld, rho); > + rho = lp_build_sqrt(rho_bld, rho); > } > /* Could optimize this for single quad just skip the broadcast */ > cubesize = lp_build_extract_broadcast(gallivm, > bld->float_size_in_type, > - levelf_bld->type, float_size, > index0); > - rho = lp_build_mul(levelf_bld, cubesize, rho); > + rho_bld->type, float_size, > index0); > + rho = lp_build_mul(rho_bld, cubesize, rho); > } > else if (derivs && !(bld->static_texture_state->target == > PIPE_TEXTURE_CUBE)) { > LLVMValueRef ddmax[3], ddx[3], ddy[3]; > @@ -311,9 +311,9 @@ lp_build_rho(struct lp_build_sample_context *bld, > * otherwise would also need different code to per-pixel lod > case. > */ > rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, > - levelf_bld->type, rho, 0); > + rho_bld->type, rho, 0); > } > - rho = lp_build_sqrt(levelf_bld, rho); > + rho = lp_build_sqrt(rho_bld, rho); > > } > else { > @@ -329,7 +329,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > * rho_vec contains per-pixel rho, convert to scalar per quad. > */ > rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, > - levelf_bld->type, rho, 0); > + rho_bld->type, rho, 0); > } > } > } > @@ -404,7 +404,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > > if (rho_per_quad) { > rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, > - levelf_bld->type, rho, 0); > + rho_bld->type, rho, 0); > } > else { > /* > @@ -416,7 +416,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > */ > rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4); > } > - rho = lp_build_sqrt(levelf_bld, rho); > + rho = lp_build_sqrt(rho_bld, rho); > } > else { > ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); > @@ -497,7 +497,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > } > if (rho_per_quad) { > rho = lp_build_pack_aos_scalars(bld->gallivm, > coord_bld->type, > - levelf_bld->type, rho, 0); > + rho_bld->type, rho, 0); > } > else { > rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4); > @@ -528,7 +528,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > } > } > if (!rho_per_quad) { > - rho = lp_build_broadcast_scalar(levelf_bld, rho); > + rho = lp_build_broadcast_scalar(rho_bld, rho); > } > } > } > @@ -675,8 +675,7 @@ lp_build_brilinear_rho(struct lp_build_context *bld, > * \param out_lod_fpart float part of lod (never larger than 1 but may be > negative) > * \param out_lod_positive (mask) if lod is positive (i.e. texture is > minified) > * > - * The resulting lod is scalar per quad, so only the first value per quad > - * passed in from lod_bias, explicit_lod is used. > + * The resulting lod can be scalar per quad or be per element. > */ > void > lp_build_lod_selector(struct lp_build_sample_context *bld, > @@ -696,12 +695,12 @@ lp_build_lod_selector(struct lp_build_sample_context > *bld, > > { > LLVMBuilderRef builder = bld->gallivm->builder; > - struct lp_build_context *levelf_bld = &bld->levelf_bld; > + struct lp_build_context *lodf_bld = &bld->lodf_bld; > LLVMValueRef lod; > > - *out_lod_ipart = bld->leveli_bld.zero; > - *out_lod_positive = bld->leveli_bld.zero; > - *out_lod_fpart = levelf_bld->zero; > + *out_lod_ipart = bld->lodi_bld.zero; > + *out_lod_positive = bld->lodi_bld.zero; > + *out_lod_fpart = lodf_bld->zero; > > /* > * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture > Magnification: > @@ -729,13 +728,13 @@ lp_build_lod_selector(struct lp_build_sample_context > *bld, > bld->dynamic_state->min_lod(bld->dynamic_state, > bld->gallivm, sampler_unit); > > - lod = lp_build_broadcast_scalar(levelf_bld, min_lod); > + lod = lp_build_broadcast_scalar(lodf_bld, min_lod); > } > else { > if (explicit_lod) { > if (bld->num_lods != bld->coord_type.length) > lod = lp_build_pack_aos_scalars(bld->gallivm, > bld->coord_bld.type, > - levelf_bld->type, explicit_lod, > 0); > + lodf_bld->type, explicit_lod, > 0); > else > lod = explicit_lod; > } > @@ -764,33 +763,33 @@ lp_build_lod_selector(struct lp_build_sample_context > *bld, > * Don't actually need both all the time, ipart is needed > * for nearest mipfilter, pos_or_zero if min != mag. > */ > - *out_lod_ipart = lp_build_ilog2(levelf_bld, rho); > - *out_lod_positive = lp_build_cmp(levelf_bld, > PIPE_FUNC_GREATER, > - rho, levelf_bld->one); > + *out_lod_ipart = lp_build_ilog2(lodf_bld, rho); > + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER, > + rho, lodf_bld->one); > return; > } > if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && > !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { > - lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR, > + lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR, > out_lod_ipart, out_lod_fpart); > - *out_lod_positive = lp_build_cmp(levelf_bld, > PIPE_FUNC_GREATER, > - rho, levelf_bld->one); > + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER, > + rho, lodf_bld->one); > return; > } > } > > if (0) { > - lod = lp_build_log2(levelf_bld, rho); > + lod = lp_build_log2(lodf_bld, rho); > } > else { > - lod = lp_build_fast_log2(levelf_bld, rho); > + lod = lp_build_fast_log2(lodf_bld, rho); > } > > /* add shader lod bias */ > if (lod_bias) { > if (bld->num_lods != bld->coord_type.length) > lod_bias = lp_build_pack_aos_scalars(bld->gallivm, > bld->coord_bld.type, > - levelf_bld->type, > lod_bias, 0); > + lodf_bld->type, > lod_bias, 0); > lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); > } > } > @@ -800,7 +799,7 @@ lp_build_lod_selector(struct lp_build_sample_context > *bld, > LLVMValueRef sampler_lod_bias = > bld->dynamic_state->lod_bias(bld->dynamic_state, > bld->gallivm, sampler_unit); > - sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld, > + sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld, > sampler_lod_bias); > lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, > "sampler_lod_bias"); > } > @@ -810,36 +809,36 @@ lp_build_lod_selector(struct lp_build_sample_context > *bld, > LLVMValueRef max_lod = > bld->dynamic_state->max_lod(bld->dynamic_state, > bld->gallivm, sampler_unit); > - max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod); > + max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod); > > - lod = lp_build_min(levelf_bld, lod, max_lod); > + lod = lp_build_min(lodf_bld, lod, max_lod); > } > if (bld->static_sampler_state->apply_min_lod) { > LLVMValueRef min_lod = > bld->dynamic_state->min_lod(bld->dynamic_state, > bld->gallivm, sampler_unit); > - min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod); > + min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod); > > - lod = lp_build_max(levelf_bld, lod, min_lod); > + lod = lp_build_max(lodf_bld, lod, min_lod); > } > } > > - *out_lod_positive = lp_build_cmp(levelf_bld, PIPE_FUNC_GREATER, > - lod, levelf_bld->zero); > + *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER, > + lod, lodf_bld->zero); > > if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { > if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { > - lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR, > + lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR, > out_lod_ipart, out_lod_fpart); > } > else { > - lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, > out_lod_fpart); > + lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart); > } > > lp_build_name(*out_lod_fpart, "lod_fpart"); > } > else { > - *out_lod_ipart = lp_build_iround(levelf_bld, lod); > + *out_lod_ipart = lp_build_iround(lodf_bld, lod); > } > > lp_build_name(*out_lod_ipart, "lod_ipart"); > @@ -880,14 +879,14 @@ lp_build_nearest_mip_level(struct > lp_build_sample_context *bld, > out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level); > out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level); > out = lp_build_or(leveli_bld, out, out1); > - if (bld->num_lods == bld->coord_bld.type.length) { > + if (bld->num_mips == bld->coord_bld.type.length) { > *out_of_bounds = out; > } > - else if (bld->num_lods == 1) { > + else if (bld->num_mips == 1) { > *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, > out); > } > else { > - assert(bld->num_lods == bld->coord_bld.type.length / 4); > + assert(bld->num_mips == bld->coord_bld.type.length / 4); > *out_of_bounds = > lp_build_unpack_broadcast_aos_scalars(bld->gallivm, > > leveli_bld->type, > > bld->int_coord_bld.type, > @@ -904,8 +903,9 @@ lp_build_nearest_mip_level(struct lp_build_sample_context > *bld, > > > /** > - * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two > (per-quad) > - * (adjacent) mipmap level indexes, and fix up float lod part accordingly. > + * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int > LOD(s) > + * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod > + * part accordingly. > * Later, we'll sample from those two mipmap levels and interpolate between > them. > */ > void > @@ -923,6 +923,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context > *bld, > LLVMValueRef clamp_min; > LLVMValueRef clamp_max; > > + assert(bld->num_lods == bld->num_mips); > + > first_level = bld->dynamic_state->first_level(bld->dynamic_state, > bld->gallivm, > texture_unit); > last_level = bld->dynamic_state->last_level(bld->dynamic_state, > @@ -1013,17 +1015,17 @@ lp_build_get_mip_offsets(struct > lp_build_sample_context *bld, > LLVMValueRef indexes[2], offsets, offset1; > > indexes[0] = lp_build_const_int32(bld->gallivm, 0); > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > indexes[1] = level; > offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, ""); > offset1 = LLVMBuildLoad(builder, offset1, ""); > offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1); > } > - else if (bld->num_lods == bld->coord_bld.type.length / 4) { > + else if (bld->num_mips == bld->coord_bld.type.length / 4) { > unsigned i; > > offsets = bld->int_coord_bld.undef; > - for (i = 0; i < bld->num_lods; i++) { > + for (i = 0; i < bld->num_mips; i++) { > LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); > LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i); > indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); > @@ -1036,10 +1038,10 @@ lp_build_get_mip_offsets(struct > lp_build_sample_context *bld, > else { > unsigned i; > > - assert (bld->num_lods == bld->coord_bld.type.length); > + assert (bld->num_mips == bld->coord_bld.type.length); > > offsets = bld->int_coord_bld.undef; > - for (i = 0; i < bld->num_lods; i++) { > + for (i = 0; i < bld->num_mips; i++) { > LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); > indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); > offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, ""); > @@ -1089,18 +1091,18 @@ lp_build_get_level_stride_vec(struct > lp_build_sample_context *bld, > LLVMBuilderRef builder = bld->gallivm->builder; > LLVMValueRef indexes[2], stride, stride1; > indexes[0] = lp_build_const_int32(bld->gallivm, 0); > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > indexes[1] = level; > stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); > stride1 = LLVMBuildLoad(builder, stride1, ""); > stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1); > } > - else if (bld->num_lods == bld->coord_bld.type.length / 4) { > + else if (bld->num_mips == bld->coord_bld.type.length / 4) { > LLVMValueRef stride1; > unsigned i; > > stride = bld->int_coord_bld.undef; > - for (i = 0; i < bld->num_lods; i++) { > + for (i = 0; i < bld->num_mips; i++) { > LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); > LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i); > indexes[1] = LLVMBuildExtractElement(builder, level, indexi, ""); > @@ -1114,7 +1116,7 @@ lp_build_get_level_stride_vec(struct > lp_build_sample_context *bld, > LLVMValueRef stride1; > unsigned i; > > - assert (bld->num_lods == bld->coord_bld.type.length); > + assert (bld->num_mips == bld->coord_bld.type.length); > > stride = bld->int_coord_bld.undef; > for (i = 0; i < bld->coord_bld.type.length; i++) { > @@ -1147,7 +1149,7 @@ lp_build_mipmap_level_sizes(struct > lp_build_sample_context *bld, > /* > * Compute width, height, depth at mipmap level 'ilevel' > */ > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); > *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, > ilevel_vec); > } > @@ -1157,7 +1159,7 @@ lp_build_mipmap_level_sizes(struct > lp_build_sample_context *bld, > unsigned num_quads = bld->coord_bld.type.length / 4; > unsigned i; > > - if (bld->num_lods == num_quads) { > + if (bld->num_mips == num_quads) { > /* > * XXX: this should be #ifndef SANE_INSTRUCTION_SET. > * intel "forgot" the variable shift count instruction until avx2. > @@ -1216,7 +1218,7 @@ lp_build_mipmap_level_sizes(struct > lp_build_sample_context *bld, > * For dims == 1 this will create [w0, w1, w2, w3, ...] vector. > * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] > vector. > */ > - assert(bld->num_lods == bld->coord_bld.type.length); > + assert(bld->num_mips == bld->coord_bld.type.length); > if (bld->dims == 1) { > assert(bld->int_size_in_bld.type.length == 1); > int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, > @@ -1226,7 +1228,7 @@ lp_build_mipmap_level_sizes(struct > lp_build_sample_context *bld, > } > else { > LLVMValueRef ilevel1; > - for (i = 0; i < bld->num_lods; i++) { > + for (i = 0; i < bld->num_mips; i++) { > LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i); > ilevel1 = lp_build_extract_broadcast(bld->gallivm, > bld->int_coord_type, > > bld->int_size_in_bld.type, > ilevel, indexi); > @@ -1235,7 +1237,7 @@ lp_build_mipmap_level_sizes(struct > lp_build_sample_context *bld, > } > *out_size = lp_build_concat(bld->gallivm, tmp, > bld->int_size_in_bld.type, > - bld->num_lods); > + bld->num_mips); > } > } > } > @@ -1278,7 +1280,7 @@ lp_build_extract_image_sizes(struct > lp_build_sample_context *bld, > LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); > struct lp_type size_type = size_bld->type; > > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > *out_width = lp_build_extract_broadcast(bld->gallivm, > size_type, > coord_type, > @@ -1305,7 +1307,7 @@ lp_build_extract_image_sizes(struct > lp_build_sample_context *bld, > if (dims == 1) { > *out_width = size; > } > - else if (bld->num_lods == num_quads) { > + else if (bld->num_mips == num_quads) { > *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4); > if (dims >= 2) { > *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4); > @@ -1315,7 +1317,7 @@ lp_build_extract_image_sizes(struct > lp_build_sample_context *bld, > } > } > else { > - assert(bld->num_lods == bld->coord_type.length); > + assert(bld->num_mips == bld->coord_type.length); > *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type, > coord_type, size, 0); > if (dims >= 2) { > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > index a7ebe7e..e6b9f30 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > @@ -233,7 +233,10 @@ struct lp_build_sample_context > /** SIMD vector width */ > unsigned vector_width; > > - /** number of lod values (valid are 1, length/4, length) */ > + /** number of mipmaps (valid are 1, length/4, length) */ > + unsigned num_mips; > + > + /** number of lod values (valid are 1, length/4, length) */ > unsigned num_lods; > > /** regular scalar float type */ > @@ -283,6 +286,14 @@ struct lp_build_sample_context > struct lp_type leveli_type; > struct lp_build_context leveli_bld; > > + /** Float lod type */ > + struct lp_type lodf_type; > + struct lp_build_context lodf_bld; > + > + /** Int lod type */ > + struct lp_type lodi_type; > + struct lp_build_context lodi_bld; > + > /* Common dynamic state values */ > LLVMValueRef row_stride_array; > LLVMValueRef img_stride_array; > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c > index 7431388..c35b628 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c > @@ -1373,7 +1373,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > lp_build_mipmap_level_sizes(bld, ilevel0, > &size0, > &row_stride0_vec, &img_stride0_vec); > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); > } > else { > @@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > > if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { > LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm, > - bld->levelf_bld.type, > 256.0); > - LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type; > + bld->lodf_bld.type, > 256.0); > + LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type; > struct lp_build_if_state if_ctx; > LLVMValueRef need_lerp; > unsigned num_quads = bld->coord_bld.type.length / 4; > @@ -1435,7 +1435,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > /* need_lerp = lod_fpart > 0 */ > if (bld->num_lods == 1) { > need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, > - lod_fpart, bld->leveli_bld.zero, > + lod_fpart, bld->lodi_bld.zero, > "need_lerp"); > } > else { > @@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > * lod_fpart values have same sign. > * We can however then skip the greater than comparison. > */ > - lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart, > - bld->leveli_bld.zero); > - need_lerp = lp_build_any_true_range(&bld->leveli_bld, > bld->num_lods, lod_fpart); > + lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart, > + bld->lodi_bld.zero); > + need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, > lod_fpart); > } > > lp_build_if(&if_ctx, bld->gallivm, need_lerp); > @@ -1465,7 +1465,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > lp_build_mipmap_level_sizes(bld, ilevel1, > &size1, > &row_stride1_vec, &img_stride1_vec); > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); > } > else { > @@ -1524,7 +1524,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > } > else { > unsigned num_chans_per_lod = 4 * bld->coord_type.length / > bld->num_lods; > - LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, > bld->leveli_bld.type.length); > + LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, > bld->lodi_bld.type.length); > LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH]; > > /* Take the LSB of lod_fpart */ > @@ -1613,7 +1613,7 @@ lp_build_sample_aos(struct lp_build_sample_context > *bld, > * some max probably could hack up the weights in the linear > * path with selects to work for nearest. > */ > - if (bld->leveli_bld.type.length > 1) > + if (bld->num_lods > 1) > lod_positive = LLVMBuildExtractElement(builder, lod_positive, > > lp_build_const_int32(bld->gallivm, > 0), ""); > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > index 8ad3b9f..c686d82 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > @@ -1087,7 +1087,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > lp_build_mipmap_level_sizes(bld, ilevel0, > &size0, > &row_stride0_vec, &img_stride0_vec); > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); > } > else { > @@ -1123,7 +1123,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > /* need_lerp = lod_fpart > 0 */ > if (bld->num_lods == 1) { > need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, > - lod_fpart, bld->levelf_bld.zero, > + lod_fpart, bld->lodf_bld.zero, > "need_lerp"); > } > else { > @@ -1138,12 +1138,12 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > * negative values which would screw up filtering if not all > * lod_fpart values have same sign. > */ > - lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart, > - bld->levelf_bld.zero); > - need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type, > + lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, > + bld->lodf_bld.zero); > + need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type, > PIPE_FUNC_GREATER, > - lod_fpart, bld->levelf_bld.zero); > - need_lerp = lp_build_any_true_range(&bld->leveli_bld, > bld->num_lods, need_lerp); > + lod_fpart, bld->lodf_bld.zero); > + need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, > need_lerp); > } > > lp_build_if(&if_ctx, bld->gallivm, need_lerp); > @@ -1152,7 +1152,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > lp_build_mipmap_level_sizes(bld, ilevel1, > &size1, > &row_stride1_vec, &img_stride1_vec); > - if (bld->num_lods == 1) { > + if (bld->num_mips == 1) { > data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); > } > else { > @@ -1178,7 +1178,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context > *bld, > > if (bld->num_lods != bld->coord_type.length) > lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, > - > bld->levelf_bld.type, > + > bld->lodf_bld.type, > > bld->texel_bld.type, > lod_fpart); > > @@ -1312,8 +1312,14 @@ lp_build_sample_common(struct lp_build_sample_context > *bld, > mip_filter, > &lod_ipart, lod_fpart, lod_pos_or_zero); > } else { > - lod_ipart = bld->leveli_bld.zero; > - *lod_pos_or_zero = bld->leveli_bld.zero; > + lod_ipart = bld->lodi_bld.zero; > + *lod_pos_or_zero = bld->lodi_bld.zero; > + } > + > + if (bld->num_lods != bld->num_mips) { > + /* only makes sense if there's just a single mip level */ > + assert(bld->num_mips == 1); > + lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1); > } > > /* > @@ -1641,7 +1647,7 @@ lp_build_sample_general(struct lp_build_sample_context > *bld, > * some max probably could hack up the weights in the linear > * path with selects to work for nearest. > */ > - if (bld->leveli_bld.type.length > 1) > + if (bld->num_lods > 1) > lod_positive = LLVMBuildExtractElement(builder, lod_positive, > > lp_build_const_int32(bld->gallivm, > 0), ""); > > @@ -1692,7 +1698,7 @@ lp_build_fetch_texel(struct lp_build_sample_context > *bld, > const LLVMValueRef *offsets, > LLVMValueRef *colors_out) > { > - struct lp_build_context *perquadi_bld = &bld->leveli_bld; > + struct lp_build_context *perquadi_bld = &bld->lodi_bld; > struct lp_build_context *int_coord_bld = &bld->int_coord_bld; > unsigned dims = bld->dims, chan; > unsigned target = bld->static_texture_state->target; > @@ -1706,7 +1712,7 @@ lp_build_fetch_texel(struct lp_build_sample_context > *bld, > out_of_bounds = int_coord_bld->zero; > > if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) { > - if (bld->num_lods != int_coord_bld->type.length) { > + if (bld->num_mips != int_coord_bld->type.length) { > ilevel = lp_build_pack_aos_scalars(bld->gallivm, > int_coord_bld->type, > perquadi_bld->type, > explicit_lod, 0); > } > @@ -1717,7 +1723,7 @@ lp_build_fetch_texel(struct lp_build_sample_context > *bld, > out_of_bound_ret_zero ? &out_of_bounds : > NULL); > } > else { > - assert(bld->num_lods == 1); > + assert(bld->num_mips == 1); > if (bld->static_texture_state->target != PIPE_BUFFER) { > ilevel = bld->dynamic_state->first_level(bld->dynamic_state, > bld->gallivm, > texture_unit); > @@ -1856,7 +1862,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > unsigned target = static_texture_state->target; > unsigned dims = texture_dims(target); > unsigned num_quads = type.length / 4; > - unsigned mip_filter, i; > + unsigned mip_filter, min_img_filter, mag_img_filter, i; > struct lp_build_sample_context bld; > struct lp_static_sampler_state derived_sampler_state = > *static_sampler_state; > LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); > @@ -1919,6 +1925,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > debug_printf(" .min_mip_filter = %u\n", > derived_sampler_state.min_mip_filter); > } > > + min_img_filter = static_sampler_state->min_img_filter; > + mag_img_filter = static_sampler_state->mag_img_filter; > + > + > /* > * This is all a bit complicated different paths are chosen for > performance > * reasons. > @@ -1936,38 +1946,51 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > /* > * There are other situations where at least the multiple int lods could > be > * avoided like min and max lod being equal. > - * XXX if num_lods == 1 (for multiple quads) the level bld contexts will > still > - * have length 4. Because lod_selector is always using per quad calcs in > this > - * case, but minification etc. don't need to bother. This is very brittle > though > - * e.g. num_lods might be 1 but still have multiple positive_lod values! > */ > + bld.num_mips = bld.num_lods = 1; > if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && > (explicit_lod || lod_bias || > - (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE)) && > - ((is_fetch && target != PIPE_BUFFER) || > - (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE))) > - bld.num_lods = type.length; > + (derivs && static_texture_state->target != PIPE_TEXTURE_CUBE))) { > + if ((is_fetch && target != PIPE_BUFFER) || > + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { > + bld.num_mips = type.length; > + bld.num_lods = type.length; > + } > + else if (!is_fetch && min_img_filter != mag_img_filter) { > + bld.num_mips = 1; > + bld.num_lods = type.length; > + } > + } > /* TODO: for true scalar_lod should only use 1 lod value */ > - else if ((is_fetch && explicit_lod && target != PIPE_BUFFER ) || > + else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) || > (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { > + bld.num_mips = num_quads; > bld.num_lods = num_quads; > } > - else { > - bld.num_lods = 1; > + else if (!is_fetch && min_img_filter != mag_img_filter) { > + bld.num_mips = 1; > + bld.num_lods = num_quads; > } > > - bld.levelf_type = type; > + > + bld.lodf_type = type; > /* we want native vector size to be able to use our intrinsics */ > if (bld.num_lods != type.length) { > - bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * > 4 : 1; > + /* TODO: this currently always has to be per-quad or per-element */ > + bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 > : 1; > + } > + bld.lodi_type = lp_int_type(bld.lodf_type); > + bld.levelf_type = bld.lodf_type; > + if (bld.num_mips == 1) { > + bld.levelf_type.length = 1; > } > bld.leveli_type = lp_int_type(bld.levelf_type); > bld.float_size_type = bld.float_size_in_type; > /* Note: size vectors may not be native. They contain minified w/h/d/_ > values, > * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 > */ > - if (bld.num_lods > 1) { > - bld.float_size_type.length = bld.num_lods == type.length ? > - bld.num_lods * > bld.float_size_in_type.length : > + if (bld.num_mips > 1) { > + bld.float_size_type.length = bld.num_mips == type.length ? > + bld.num_mips * > bld.float_size_in_type.length : > type.length; > } > bld.int_size_type = lp_int_type(bld.float_size_type); > @@ -1984,6 +2007,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); > lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type); > lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type); > + lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type); > + lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type); > > /* Get the dynamic state */ > tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index); > @@ -2071,16 +2096,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > * (It should be faster if we'd support avx2) > */ > if (num_quads == 1 || !use_aos) { > - > - if (num_quads > 1) { > - if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { > - LLVMValueRef index0 = lp_build_const_int32(gallivm, 0); > - /* > - * This parameter is the same for all quads could probably > simplify. > - */ > - ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, > ""); > - } > - } > if (use_aos) { > /* do sampling/filtering with fixed pt arithmetic */ > lp_build_sample_aos(&bld, sampler_index, > @@ -2134,30 +2149,37 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type); > bld4.texel_type = bld.texel_type; > bld4.texel_type.length = 4; > - bld4.levelf_type = type4; > - /* we want native vector size to be able to use our intrinsics */ > - bld4.levelf_type.length = 1; > - bld4.leveli_type = lp_int_type(bld4.levelf_type); > > + bld4.num_mips = bld4.num_lods = 1; > if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && > - (explicit_lod || lod_bias || > - (derivs && static_texture_state->target != > PIPE_TEXTURE_CUBE)) && > - ((is_fetch && target != PIPE_BUFFER) || > - (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE))) > - bld4.num_lods = type4.length; > - else > - bld4.num_lods = 1; > + (explicit_lod || lod_bias || > + (derivs && static_texture_state->target != > PIPE_TEXTURE_CUBE))) { > + if ((is_fetch && target != PIPE_BUFFER) || > + (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { > + bld4.num_mips = type4.length; > + bld4.num_lods = type4.length; > + } > + else if (!is_fetch && min_img_filter != mag_img_filter) { > + bld4.num_mips = 1; > + bld4.num_lods = type4.length; > + } > + } > > - bld4.levelf_type = type4; > /* we want native vector size to be able to use our intrinsics */ > + bld4.lodf_type = type4; > if (bld4.num_lods != type4.length) { > + bld4.lodf_type.length = 1; > + } > + bld4.lodi_type = lp_int_type(bld4.lodf_type); > + bld4.levelf_type = type4; > + if (bld4.num_mips != type4.length) { > bld4.levelf_type.length = 1; > } > bld4.leveli_type = lp_int_type(bld4.levelf_type); > bld4.float_size_type = bld4.float_size_in_type; > - if (bld4.num_lods > 1) { > - bld4.float_size_type.length = bld4.num_lods == type4.length ? > - bld4.num_lods * > bld4.float_size_in_type.length : > + if (bld4.num_mips > 1) { > + bld4.float_size_type.length = bld4.num_mips == type4.length ? > + bld4.num_mips * > bld4.float_size_in_type.length : > type4.length; > } > bld4.int_size_type = lp_int_type(bld4.float_size_type); > @@ -2174,6 +2196,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); > lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type); > lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type); > + lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type); > + lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type); > > for (i = 0; i < num_quads; i++) { > LLVMValueRef s4, t4, r4; > @@ -2196,7 +2220,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > } > } > lod_positive4 = lp_build_extract_range(gallivm, lod_positive, > num_lods * i, num_lods); > - ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * > i, num_lods); > + ilevel04 = bld.num_mips == 1 ? ilevel0 : > + lp_build_extract_range(gallivm, ilevel0, num_lods > * i, num_lods); > if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { > ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods > * i, num_lods); > lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, > num_lods * i, num_lods); > -- > 1.7.9.5 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev