I only skimmed, but looks good in principle. Jose
----- Original Message ----- > From: Roland Scheidegger <srol...@vmware.com> > > Previously, the derivatives were calculated and passed in a packed form > to the sample code (for implicit derivatives, explicit derivatives were > packed to the same format). > There's several reasons why this wasn't such a good idea: > 1) the derivatives may not even be needed (not as bad as it sounds since > llvm will just throw the calculations needed for them away but still) > 2) the special packing format really shouldn't be part of the sampler > interface > 3) depending what the sample code actually does the derivatives will > be processed differently, hence there is no "ideal" packing. For cube > maps with explicit derivatives (which we don't do yet) for instance the > packing looked downright useless, and for non-isotropic filtering we'd > need different calculations too. > > So, instead just pass the derivatives as is (for explicit derivatives), > or let the rho calculating sample code calculate them itself. This still > does exactly the same packing stuff for implicit derivatives for now, > though explicit ones are handled in a more straightforward manner (quick > estimates show performance should be quite similar, though it is much > easier to follow and also does the rho calculation per-pixel until the > end, which we eventually need for spec compliance anyway). > > No piglit changes. > --- > src/gallium/auxiliary/gallivm/lp_bld_quad.c | 14 +- > src/gallium/auxiliary/gallivm/lp_bld_sample.c | 271 > +++++++++++++-------- > src/gallium/auxiliary/gallivm/lp_bld_sample.h | 6 +- > src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 11 +- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 21 +- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 122 +--------- > 6 files changed, 196 insertions(+), 249 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c > b/src/gallium/auxiliary/gallivm/lp_bld_quad.c > index 8a0efed..1955add 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c > @@ -79,14 +79,9 @@ lp_build_ddy(struct lp_build_context *bld, > } > > /* > - * To be able to handle multiple quads at once in texture sampling and > - * do lod calculations per quad, it is necessary to get the per-quad > - * derivatives into the lp_build_rho function. > - * For 8-wide vectors the packed derivative values for 3 coords would > - * look like this, this scales to a arbitrary (multiple of 4) vector size: > - * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy > + * Helper for building packed ddx/ddy vector for one coord (scalar per quad > + * values). The vector will look like this (8-wide): > * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____ > - * The second vector will be unused for 1d and 2d textures. > */ > LLVMValueRef > lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, > @@ -121,6 +116,11 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context > *bld, > } > > > +/* > + * Helper for building packed ddx/ddy vector for one coord (scalar per quad > + * values). The vector will look like this (8-wide): > + * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy > + */ > LLVMValueRef > lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, > LLVMValueRef a, LLVMValueRef b) > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample.c > index ef0631c..fc8bae7 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c > @@ -46,6 +46,7 @@ > #include "lp_bld_type.h" > #include "lp_bld_logic.h" > #include "lp_bld_pack.h" > +#include "lp_bld_quad.h" > > > /* > @@ -203,6 +204,9 @@ lp_sampler_static_sampler_state(struct > lp_static_sampler_state *state, > static LLVMValueRef > lp_build_rho(struct lp_build_sample_context *bld, > unsigned texture_unit, > + LLVMValueRef s, > + LLVMValueRef t, > + LLVMValueRef r, > const struct lp_derivatives *derivs) > { > struct gallivm_state *gallivm = bld->gallivm; > @@ -211,8 +215,8 @@ lp_build_rho(struct lp_build_sample_context *bld, > struct lp_build_context *float_bld = &bld->float_bld; > struct lp_build_context *coord_bld = &bld->coord_bld; > struct lp_build_context *perquadf_bld = &bld->perquadf_bld; > - const LLVMValueRef *ddx_ddy = derivs->ddx_ddy; > const unsigned dims = bld->dims; > + LLVMValueRef ddx_ddy[2]; > LLVMBuilderRef builder = bld->gallivm->builder; > LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); > LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); > @@ -229,59 +233,7 @@ lp_build_rho(struct lp_build_sample_context *bld, > LLVMValueRef i32undef = > LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); > LLVMValueRef rho_xvec, rho_yvec; > > - abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); > - if (dims > 2) { > - abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]); > - } > - else { > - abs_ddx_ddy[1] = NULL; > - } > - > - if (dims == 1) { > - static const unsigned char swizzle1[] = { > - 0, LP_BLD_SWIZZLE_DONTCARE, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - static const unsigned char swizzle2[] = { > - 1, LP_BLD_SWIZZLE_DONTCARE, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); > - rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); > - } > - else if (dims == 2) { > - static const unsigned char swizzle1[] = { > - 0, 2, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - static const unsigned char swizzle2[] = { > - 1, 3, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); > - rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); > - } > - else { > - LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH]; > - LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH]; > - assert(dims == 3); > - for (i = 0; i < num_quads; i++) { > - shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i); > - shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2); > - shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i); > - shuffles1[4*i + 3] = i32undef; > - shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1); > - shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3); > - shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + > 1); > - shuffles2[4*i + 3] = i32undef; > - } > - rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], > abs_ddx_ddy[1], > - LLVMConstVector(shuffles1, length), > ""); > - rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], > abs_ddx_ddy[1], > - LLVMConstVector(shuffles2, length), > ""); > - } > - > - rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec); > + /* Note that all simplified calculations will only work for isotropic > filtering */ > > first_level = bld->dynamic_state->first_level(bld->dynamic_state, > bld->gallivm, > texture_unit); > @@ -289,76 +241,176 @@ lp_build_rho(struct lp_build_sample_context *bld, > int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec); > float_size = lp_build_int_to_float(float_size_bld, int_size); > > - if (bld->coord_type.length > 4) { > - /* expand size to each quad */ > + /* XXX ignoring explicit derivs for cube maps for now */ > + if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) > { > + LLVMValueRef ddmax[3]; > + for (i = 0; i < dims; i++) { > + LLVMValueRef ddx, ddy; > + LLVMValueRef floatdim; > + LLVMValueRef indexi = lp_build_const_int32(gallivm, i); > + ddx = lp_build_abs(coord_bld, derivs->ddx[i]); > + ddy = lp_build_abs(coord_bld, derivs->ddy[i]); > + ddmax[i] = lp_build_max(coord_bld, ddx, ddy); > + floatdim = lp_build_extract_broadcast(gallivm, > bld->float_size_in_type, > + coord_bld->type, float_size, > indexi); > + ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]); > + } > + rho_vec = ddmax[0]; > if (dims > 1) { > - /* could use some broadcast_vector helper for this? */ > - int num_quads = bld->coord_type.length / 4; > - LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4]; > - for (i = 0; i < num_quads; i++) { > - src[i] = float_size; > + rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[1]); > + if (dims > 2) { > + rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[2]); > + } > + } > + /* > + * rho_vec now still contains per-pixel rho, convert to scalar per > quad > + * since we can't handle per-pixel rho/lod from now on (TODO). > + */ > + rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, > + perquadf_bld->type, rho_vec, 0); > + } > + else { > + /* > + * This looks all a bit complex, but it's not that bad > + * (the shuffle code makes it look worse than it is). > + * Still, might not be ideal for all cases. > + */ > + if (dims < 2) { > + ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s); > + } > + else if (dims >= 2) { > + ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, > + s, t); > + if (dims > 2) { > + ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r); > } > - float_size = lp_build_concat(bld->gallivm, src, > float_size_bld->type, num_quads); > + } > + > + abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); > + if (dims > 2) { > + abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]); > } > else { > - float_size = lp_build_broadcast_scalar(coord_bld, float_size); > + abs_ddx_ddy[1] = NULL; > } > - rho_vec = lp_build_mul(coord_bld, rho_vec, float_size); > > - if (dims <= 1) { > - rho = rho_vec; > + if (dims == 1) { > + static const unsigned char swizzle1[] = { > + 0, LP_BLD_SWIZZLE_DONTCARE, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + static const unsigned char swizzle2[] = { > + 1, LP_BLD_SWIZZLE_DONTCARE, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], > swizzle1); > + rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], > swizzle2); > + } > + else if (dims == 2) { > + static const unsigned char swizzle1[] = { > + 0, 2, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + static const unsigned char swizzle2[] = { > + 1, 3, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], > swizzle1); > + rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], > swizzle2); > } > else { > - if (dims >= 2) { > - static const unsigned char swizzle1[] = { > - 0, LP_BLD_SWIZZLE_DONTCARE, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - static const unsigned char swizzle2[] = { > - 1, LP_BLD_SWIZZLE_DONTCARE, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - LLVMValueRef rho_s, rho_t, rho_r; > - > - rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1); > - rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2); > - > - rho = lp_build_max(coord_bld, rho_s, rho_t); > - > - if (dims >= 3) { > - static const unsigned char swizzle3[] = { > - 2, LP_BLD_SWIZZLE_DONTCARE, > + LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH]; > + LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH]; > + assert(dims == 3); > + for (i = 0; i < num_quads; i++) { > + shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i); > + shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2); > + shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + > 4*i); > + shuffles1[4*i + 3] = i32undef; > + shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1); > + shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3); > + shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i > + 1); > + shuffles2[4*i + 3] = i32undef; > + } > + rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], > abs_ddx_ddy[1], > + LLVMConstVector(shuffles1, > length), ""); > + rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], > abs_ddx_ddy[1], > + LLVMConstVector(shuffles2, > length), ""); > + } > + > + rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec); > + > + if (bld->coord_type.length > 4) { > + /* expand size to each quad */ > + if (dims > 1) { > + /* could use some broadcast_vector helper for this? */ > + int num_quads = bld->coord_type.length / 4; > + LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4]; > + for (i = 0; i < num_quads; i++) { > + src[i] = float_size; > + } > + float_size = lp_build_concat(bld->gallivm, src, > float_size_bld->type, num_quads); > + } > + else { > + float_size = lp_build_broadcast_scalar(coord_bld, float_size); > + } > + rho_vec = lp_build_mul(coord_bld, rho_vec, float_size); > + > + if (dims <= 1) { > + rho = rho_vec; > + } > + else { > + if (dims >= 2) { > + static const unsigned char swizzle1[] = { > + 0, LP_BLD_SWIZZLE_DONTCARE, > LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > }; > - rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3); > - rho = lp_build_max(coord_bld, rho, rho_r); > + static const unsigned char swizzle2[] = { > + 1, LP_BLD_SWIZZLE_DONTCARE, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + LLVMValueRef rho_s, rho_t, rho_r; > + > + rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1); > + rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2); > + > + rho = lp_build_max(coord_bld, rho_s, rho_t); > + > + if (dims >= 3) { > + static const unsigned char swizzle3[] = { > + 2, LP_BLD_SWIZZLE_DONTCARE, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, > swizzle3); > + rho = lp_build_max(coord_bld, rho, rho_r); > + } > } > } > - } > - rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, > - perquadf_bld->type, rho, 0); > - } > - else { > - if (dims <= 1) { > - rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, ""); > - } > - rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); > - > - if (dims <= 1) { > - rho = rho_vec; > + rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type, > + perquadf_bld->type, rho, 0); > } > else { > - if (dims >= 2) { > - LLVMValueRef rho_s, rho_t, rho_r; > + if (dims <= 1) { > + rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, ""); > + } > + rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); > + > + if (dims <= 1) { > + rho = rho_vec; > + } > + else { > + if (dims >= 2) { > + LLVMValueRef rho_s, rho_t, rho_r; > > - rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, ""); > - rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, ""); > + rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, > ""); > + rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, > ""); > > - rho = lp_build_max(float_bld, rho_s, rho_t); > + rho = lp_build_max(float_bld, rho_s, rho_t); > > - if (dims >= 3) { > - rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, > ""); > - rho = lp_build_max(float_bld, rho, rho_r); > + if (dims >= 3) { > + rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, > ""); > + rho = lp_build_max(float_bld, rho, rho_r); > + } > } > } > } > @@ -511,6 +563,9 @@ void > lp_build_lod_selector(struct lp_build_sample_context *bld, > unsigned texture_unit, > unsigned sampler_unit, > + LLVMValueRef s, > + LLVMValueRef t, > + LLVMValueRef r, > const struct lp_derivatives *derivs, > LLVMValueRef lod_bias, /* optional */ > LLVMValueRef explicit_lod, /* optional */ > @@ -544,7 +599,7 @@ lp_build_lod_selector(struct lp_build_sample_context > *bld, > else { > LLVMValueRef rho; > > - rho = lp_build_rho(bld, texture_unit, derivs); > + rho = lp_build_rho(bld, texture_unit, s, t, r, derivs); > > /* > * Compute lod = log2(rho) > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > index 6306455..1abe0ca 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h > @@ -56,7 +56,8 @@ struct lp_build_context; > */ > struct lp_derivatives > { > - LLVMValueRef ddx_ddy[2]; > + LLVMValueRef ddx[3]; > + LLVMValueRef ddy[3]; > }; > > > @@ -366,6 +367,9 @@ void > lp_build_lod_selector(struct lp_build_sample_context *bld, > unsigned texture_index, > unsigned sampler_index, > + LLVMValueRef s, > + LLVMValueRef t, > + LLVMValueRef r, > const struct lp_derivatives *derivs, > LLVMValueRef lod_bias, /* optional */ > LLVMValueRef explicit_lod, /* optional */ > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > index 8aa4166..cdd910f 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c > @@ -1077,7 +1077,7 @@ lp_build_sample_common(struct lp_build_sample_context > *bld, > LLVMValueRef *s, > LLVMValueRef *t, > LLVMValueRef *r, > - const struct lp_derivatives *derivs, > + const struct lp_derivatives *derivs, /* optional */ > LLVMValueRef lod_bias, /* optional */ > LLVMValueRef explicit_lod, /* optional */ > LLVMValueRef *lod_ipart, > @@ -1090,7 +1090,6 @@ lp_build_sample_common(struct lp_build_sample_context > *bld, > const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; > const unsigned target = bld->static_texture_state->target; > LLVMValueRef first_level; > - struct lp_derivatives face_derivs; > > /* > printf("%s mip %d min %d mag %d\n", __FUNCTION__, > @@ -1107,11 +1106,6 @@ lp_build_sample_common(struct lp_build_sample_context > *bld, > *t = face_t; /* vec */ > /* use 'r' to indicate cube face */ > *r = face; /* vec */ > - > - /* recompute ddx, ddy using the new (s,t) face texcoords */ > - face_derivs.ddx_ddy[0] = > lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, *s, *t); > - face_derivs.ddx_ddy[1] = NULL; > - derivs = &face_derivs; > } > else if (target == PIPE_TEXTURE_1D_ARRAY) { > *r = lp_build_iround(&bld->coord_bld, *t); > @@ -1131,6 +1125,7 @@ lp_build_sample_common(struct lp_build_sample_context > *bld, > * distinguish between minification/magnification with one mipmap > level. > */ > lp_build_lod_selector(bld, texture_index, sampler_index, > + *s, *t, *r, > derivs, lod_bias, explicit_lod, > mip_filter, > lod_ipart, lod_fpart); > @@ -1479,7 +1474,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, > unsigned sampler_index, > const LLVMValueRef *coords, > const LLVMValueRef *offsets, > - const struct lp_derivatives *derivs, > + const struct lp_derivatives *derivs, /* optional */ > LLVMValueRef lod_bias, /* optional */ > LLVMValueRef explicit_lod, /* optional */ > LLVMValueRef texel_out[4]) > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c > index 9a30cc8..98bce0e 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c > @@ -363,7 +363,7 @@ emit_tex(struct lp_build_tgsi_aos_context *bld, > unsigned target; > unsigned unit; > LLVMValueRef coords; > - struct lp_derivatives derivs; > + struct lp_derivatives derivs = { {NULL}, {NULL} }; > > if (!bld->sampler) { > _debug_printf("warning: found texture instruction but no sampler > generator supplied\n"); > @@ -374,22 +374,15 @@ emit_tex(struct lp_build_tgsi_aos_context *bld, > > coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); > > - if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { > - lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); > - lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); > + if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { > + /* probably not going to work */ > + derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , > LP_CHAN_ALL); > + derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , > LP_CHAN_ALL); > unit = inst->Src[3].Register.Index; > - } else { > -#if 0 > - ddx = lp_build_ddx( &bld->bld_base.base, coords ); > - ddy = lp_build_ddy( &bld->bld_base.base, coords ); > -#else > - /* TODO */ > - derivs.ddx_ddy[0] = bld->bld_base.base.one; > - derivs.ddx_ddy[1] = bld->bld_base.base.one; > -#endif > + } > + else { > unit = inst->Src[1].Register.Index; > } > - > return bld->sampler->emit_fetch_texel(bld->sampler, > &bld->bld_base.base, > target, unit, > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 69957fe..9fe87c4 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -1164,14 +1164,13 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, > enum lp_build_tex_modifier modifier, > LLVMValueRef *texel) > { > - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > - struct gallivm_state *gallivm = bld->bld_base.base.gallivm; > unsigned unit; > LLVMValueRef lod_bias, explicit_lod; > LLVMValueRef oow = NULL; > LLVMValueRef coords[4]; > LLVMValueRef offsets[3] = { NULL }; > struct lp_derivatives derivs; > + struct lp_derivatives *deriv_ptr = NULL; > unsigned num_coords; > unsigned dims; > unsigned i; > @@ -1184,9 +1183,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, > return; > } > > - derivs.ddx_ddy[0] = bld->bld_base.base.undef; > - derivs.ddx_ddy[1] = bld->bld_base.base.undef; > - > switch (inst->Texture.Texture) { > case TGSI_TEXTURE_1D: > num_coords = 1; > @@ -1259,58 +1255,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, > } > > if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { > - LLVMValueRef i32undef = > LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); > - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; > - LLVMValueRef ddxdyonec[3]; > - unsigned length = bld->bld_base.base.type.length; > - unsigned num_quads = length / 4; > unsigned dim; > - unsigned quad; > - > for (dim = 0; dim < dims; ++dim) { > - LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, > dim ); > - LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, > dim ); > - for (quad = 0; quad < num_quads; ++quad) { > - unsigned s1 = 4*quad; > - unsigned s2 = 4*quad + length; > - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); > - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2); > - shuffles[4*quad + 2] = i32undef; > - shuffles[4*quad + 3] = i32undef; > - } > - ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy, > - LLVMConstVector(shuffles, > length), ""); > - } > - if (dims == 1) { > - derivs.ddx_ddy[0] = ddxdyonec[0]; > - } > - else if (dims >= 2) { > - for (quad = 0; quad < num_quads; ++quad) { > - unsigned s1 = 4*quad; > - unsigned s2 = 4*quad + length; > - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); > - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1); > - shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2); > - shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1); > - } > - derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], > ddxdyonec[1], > - LLVMConstVector(shuffles, > length), ""); > - if (dims == 3) { > - derivs.ddx_ddy[1] = ddxdyonec[2]; > - } > + derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim > ); > + derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim > ); > } > + deriv_ptr = &derivs; > unit = inst->Src[3].Register.Index; > } else { > - if (dims == 1) { > - derivs.ddx_ddy[0] = > lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]); > - } > - else if (dims >= 2) { > - derivs.ddx_ddy[0] = > lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base, > - coords[0], > coords[1]); > - if (dims == 3) { > - derivs.ddx_ddy[1] = > lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]); > - } > - } > unit = inst->Src[1].Register.Index; > } > > @@ -1329,7 +1281,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, > unit, unit, > coords, > offsets, > - &derivs, > + deriv_ptr, > lod_bias, explicit_lod, > texel); > } > @@ -1341,13 +1293,13 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, > boolean compare, > LLVMValueRef *texel) > { > - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; > struct gallivm_state *gallivm = bld->bld_base.base.gallivm; > unsigned texture_unit, sampler_unit; > LLVMValueRef lod_bias, explicit_lod; > LLVMValueRef coords[4]; > LLVMValueRef offsets[3] = { NULL }; > struct lp_derivatives derivs; > + struct lp_derivatives *deriv_ptr = NULL; > unsigned num_coords, dims; > unsigned i; > > @@ -1366,9 +1318,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, > texture_unit = inst->Src[1].Register.Index; > sampler_unit = inst->Src[2].Register.Index; > > - derivs.ddx_ddy[0] = bld->bld_base.base.undef; > - derivs.ddx_ddy[1] = bld->bld_base.base.undef; > - > /* > * Note inst->Texture.Texture will contain the number of offsets, > * however the target information is NOT there and comes from the > @@ -1449,57 +1398,12 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, > } > > if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { > - LLVMValueRef i32undef = > LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); > - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; > - LLVMValueRef ddxdyonec[3]; > - unsigned length = bld->bld_base.base.type.length; > - unsigned num_quads = length / 4; > unsigned dim; > - unsigned quad; > - > for (dim = 0; dim < dims; ++dim) { > - LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 3, > dim ); > - LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 4, > dim ); > - for (quad = 0; quad < num_quads; ++quad) { > - unsigned s1 = 4*quad; > - unsigned s2 = 4*quad + length; > - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); > - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2); > - shuffles[4*quad + 2] = i32undef; > - shuffles[4*quad + 3] = i32undef; > - } > - ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy, > - LLVMConstVector(shuffles, > length), ""); > - } > - if (dims == 1) { > - derivs.ddx_ddy[0] = ddxdyonec[0]; > - } > - else if (dims >= 2) { > - for (quad = 0; quad < num_quads; ++quad) { > - unsigned s1 = 4*quad; > - unsigned s2 = 4*quad + length; > - shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); > - shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1); > - shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2); > - shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1); > - } > - derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], > ddxdyonec[1], > - LLVMConstVector(shuffles, > length), ""); > - if (dims == 3) { > - derivs.ddx_ddy[1] = ddxdyonec[2]; > - } > - } > - } else { > - if (dims == 1) { > - derivs.ddx_ddy[0] = > lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]); > - } > - else if (dims >= 2) { > - derivs.ddx_ddy[0] = > lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base, > - coords[0], > coords[1]); > - if (dims == 3) { > - derivs.ddx_ddy[1] = > lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]); > - } > + derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim > ); > + derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim > ); > } > + deriv_ptr = &derivs; > } > > /* some advanced gather instructions (txgo) would require 4 offsets */ > @@ -1517,7 +1421,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, > texture_unit, sampler_unit, > coords, > offsets, > - &derivs, > + deriv_ptr, > lod_bias, explicit_lod, > texel); > } > @@ -1533,7 +1437,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context > *bld, > LLVMValueRef explicit_lod = NULL; > LLVMValueRef coords[3]; > LLVMValueRef offsets[3] = { NULL }; > - struct lp_derivatives derivs; > unsigned num_coords; > unsigned dims; > unsigned i; > @@ -1548,9 +1451,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context > *bld, > > unit = inst->Src[1].Register.Index; > > - derivs.ddx_ddy[0] = coord_undef; > - derivs.ddx_ddy[1] = coord_undef; > - > if (is_samplei) { > target = bld->sv[unit].Resource; > } > @@ -1612,7 +1512,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context > *bld, > unit, unit, > coords, > offsets, > - &derivs, > + NULL, > NULL, explicit_lod, > texel); > } > -- > 1.7.9.5 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev