Re: [Mesa-dev] [PATCH] gallivm: clean up passing derivatives around

Jose Fonseca Mon, 11 Mar 2013 09:21:24 -0700

I only skimmed, but looks good in principle.

Jose


----- Original Message -----
> From: Roland Scheidegger <srol...@vmware.com>
> 
> Previously, the derivatives were calculated and passed in a packed form
> to the sample code (for implicit derivatives, explicit derivatives were
> packed to the same format).
> There's several reasons why this wasn't such a good idea:
> 1) the derivatives may not even be needed (not as bad as it sounds since
> llvm will just throw the calculations needed for them away but still)
> 2) the special packing format really shouldn't be part of the sampler
> interface
> 3) depending what the sample code actually does the derivatives will
> be processed differently, hence there is no "ideal" packing. For cube
> maps with explicit derivatives (which we don't do yet) for instance the
> packing looked downright useless, and for non-isotropic filtering we'd
> need different calculations too.
> 
> So, instead just pass the derivatives as is (for explicit derivatives),
> or let the rho calculating sample code calculate them itself. This still
> does exactly the same packing stuff for implicit derivatives for now,
> though explicit ones are handled in a more straightforward manner (quick
> estimates show performance should be quite similar, though it is much
> easier to follow and also does the rho calculation per-pixel until the
> end, which we eventually need for spec compliance anyway).
> 
> No piglit changes.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_quad.c       |   14 +-
>  src/gallium/auxiliary/gallivm/lp_bld_sample.c     |  271
>  +++++++++++++--------
>  src/gallium/auxiliary/gallivm/lp_bld_sample.h     |    6 +-
>  src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |   11 +-
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c   |   21 +-
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c   |  122 +---------
>  6 files changed, 196 insertions(+), 249 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
> b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
> index 8a0efed..1955add 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
> @@ -79,14 +79,9 @@ lp_build_ddy(struct lp_build_context *bld,
>  }
>  
>  /*
> - * To be able to handle multiple quads at once in texture sampling and
> - * do lod calculations per quad, it is necessary to get the per-quad
> - * derivatives into the lp_build_rho function.
> - * For 8-wide vectors the packed derivative values for 3 coords would
> - * look like this, this scales to a arbitrary (multiple of 4) vector size:
> - * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
> + * Helper for building packed ddx/ddy vector for one coord (scalar per quad
> + * values). The vector will look like this (8-wide):
>   * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
> - * The second vector will be unused for 1d and 2d textures.
>   */
>  LLVMValueRef
>  lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
> @@ -121,6 +116,11 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context
> *bld,
>  }
>  
>  
> +/*
> + * Helper for building packed ddx/ddy vector for one coord (scalar per quad
> + * values). The vector will look like this (8-wide):
> + * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
> + */
>  LLVMValueRef
>  lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
>                                   LLVMValueRef a, LLVMValueRef b)
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> index ef0631c..fc8bae7 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
> @@ -46,6 +46,7 @@
>  #include "lp_bld_type.h"
>  #include "lp_bld_logic.h"
>  #include "lp_bld_pack.h"
> +#include "lp_bld_quad.h"
>  
>  
>  /*
> @@ -203,6 +204,9 @@ lp_sampler_static_sampler_state(struct
> lp_static_sampler_state *state,
>  static LLVMValueRef
>  lp_build_rho(struct lp_build_sample_context *bld,
>               unsigned texture_unit,
> +             LLVMValueRef s,
> +             LLVMValueRef t,
> +             LLVMValueRef r,
>               const struct lp_derivatives *derivs)
>  {
>     struct gallivm_state *gallivm = bld->gallivm;
> @@ -211,8 +215,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
>     struct lp_build_context *float_bld = &bld->float_bld;
>     struct lp_build_context *coord_bld = &bld->coord_bld;
>     struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
> -   const LLVMValueRef *ddx_ddy = derivs->ddx_ddy;
>     const unsigned dims = bld->dims;
> +   LLVMValueRef ddx_ddy[2];
>     LLVMBuilderRef builder = bld->gallivm->builder;
>     LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
>     LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
> @@ -229,59 +233,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
>     LLVMValueRef i32undef =
>     LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
>     LLVMValueRef rho_xvec, rho_yvec;
>  
> -   abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
> -   if (dims > 2) {
> -      abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
> -   }
> -   else {
> -      abs_ddx_ddy[1] = NULL;
> -   }
> -
> -   if (dims == 1) {
> -      static const unsigned char swizzle1[] = {
> -         0, LP_BLD_SWIZZLE_DONTCARE,
> -         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> -      };
> -      static const unsigned char swizzle2[] = {
> -         1, LP_BLD_SWIZZLE_DONTCARE,
> -         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> -      };
> -      rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
> -      rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
> -   }
> -   else if (dims == 2) {
> -      static const unsigned char swizzle1[] = {
> -         0, 2,
> -         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> -      };
> -      static const unsigned char swizzle2[] = {
> -         1, 3,
> -         LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> -      };
> -      rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
> -      rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
> -   }
> -   else {
> -      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
> -      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
> -      assert(dims == 3);
> -      for (i = 0; i < num_quads; i++) {
> -         shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
> -         shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
> -         shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
> -         shuffles1[4*i + 3] = i32undef;
> -         shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
> -         shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
> -         shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i +
> 1);
> -         shuffles2[4*i + 3] = i32undef;
> -      }
> -      rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0],
> abs_ddx_ddy[1],
> -                                        LLVMConstVector(shuffles1, length),
> "");
> -      rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0],
> abs_ddx_ddy[1],
> -                                        LLVMConstVector(shuffles2, length),
> "");
> -   }
> -
> -   rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
> +   /* Note that all simplified calculations will only work for isotropic
> filtering */
>  
>     first_level = bld->dynamic_state->first_level(bld->dynamic_state,
>                                                   bld->gallivm,
>                                                   texture_unit);
> @@ -289,76 +241,176 @@ lp_build_rho(struct lp_build_sample_context *bld,
>     int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
>     float_size = lp_build_int_to_float(float_size_bld, int_size);
>  
> -   if (bld->coord_type.length > 4) {
> -      /* expand size to each quad */
> +   /* XXX ignoring explicit derivs for cube maps for now */
> +   if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE))
> {
> +      LLVMValueRef ddmax[3];
> +      for (i = 0; i < dims; i++) {
> +         LLVMValueRef ddx, ddy;
> +         LLVMValueRef floatdim;
> +         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
> +         ddx = lp_build_abs(coord_bld, derivs->ddx[i]);
> +         ddy = lp_build_abs(coord_bld, derivs->ddy[i]);
> +         ddmax[i] = lp_build_max(coord_bld, ddx, ddy);
> +         floatdim = lp_build_extract_broadcast(gallivm,
> bld->float_size_in_type,
> +                                               coord_bld->type, float_size,
> indexi);
> +         ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
> +      }
> +      rho_vec = ddmax[0];
>        if (dims > 1) {
> -         /* could use some broadcast_vector helper for this? */
> -         int num_quads = bld->coord_type.length / 4;
> -         LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
> -         for (i = 0; i < num_quads; i++) {
> -            src[i] = float_size;
> +         rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[1]);
> +         if (dims > 2) {
> +            rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[2]);
> +         }
> +      }
> +      /*
> +       * rho_vec now still contains per-pixel rho, convert to scalar per
> quad
> +       * since we can't handle per-pixel rho/lod from now on (TODO).
> +       */
> +      rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> +                                      perquadf_bld->type, rho_vec, 0);
> +   }
> +   else {
> +      /*
> +       * This looks all a bit complex, but it's not that bad
> +       * (the shuffle code makes it look worse than it is).
> +       * Still, might not be ideal for all cases.
> +       */
> +      if (dims < 2) {
> +         ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
> +      }
> +      else if (dims >= 2) {
> +         ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld,
> +                                                       s, t);
> +         if (dims > 2) {
> +            ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
>           }
> -         float_size = lp_build_concat(bld->gallivm, src,
> float_size_bld->type, num_quads);
> +      }
> +
> +      abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
> +      if (dims > 2) {
> +         abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
>        }
>        else {
> -         float_size = lp_build_broadcast_scalar(coord_bld, float_size);
> +         abs_ddx_ddy[1] = NULL;
>        }
> -      rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
>  
> -      if (dims <= 1) {
> -         rho = rho_vec;
> +      if (dims == 1) {
> +         static const unsigned char swizzle1[] = {
> +            0, LP_BLD_SWIZZLE_DONTCARE,
> +            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> +         };
> +         static const unsigned char swizzle2[] = {
> +            1, LP_BLD_SWIZZLE_DONTCARE,
> +            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> +         };
> +         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0],
> swizzle1);
> +         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0],
> swizzle2);
> +      }
> +      else if (dims == 2) {
> +         static const unsigned char swizzle1[] = {
> +            0, 2,
> +            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> +         };
> +         static const unsigned char swizzle2[] = {
> +            1, 3,
> +            LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> +         };
> +         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0],
> swizzle1);
> +         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0],
> swizzle2);
>        }
>        else {
> -         if (dims >= 2) {
> -            static const unsigned char swizzle1[] = {
> -               0, LP_BLD_SWIZZLE_DONTCARE,
> -               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> -            };
> -            static const unsigned char swizzle2[] = {
> -               1, LP_BLD_SWIZZLE_DONTCARE,
> -               LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> -            };
> -            LLVMValueRef rho_s, rho_t, rho_r;
> -
> -            rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
> -            rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
> -
> -            rho = lp_build_max(coord_bld, rho_s, rho_t);
> -
> -            if (dims >= 3) {
> -               static const unsigned char swizzle3[] = {
> -                  2, LP_BLD_SWIZZLE_DONTCARE,
> +         LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
> +         LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
> +         assert(dims == 3);
> +         for (i = 0; i < num_quads; i++) {
> +            shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
> +            shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
> +            shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length +
> 4*i);
> +            shuffles1[4*i + 3] = i32undef;
> +            shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
> +            shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
> +            shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i
> + 1);
> +            shuffles2[4*i + 3] = i32undef;
> +         }
> +         rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0],
> abs_ddx_ddy[1],
> +                                           LLVMConstVector(shuffles1,
> length), "");
> +         rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0],
> abs_ddx_ddy[1],
> +                                           LLVMConstVector(shuffles2,
> length), "");
> +      }
> +
> +      rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
> +
> +      if (bld->coord_type.length > 4) {
> +         /* expand size to each quad */
> +         if (dims > 1) {
> +            /* could use some broadcast_vector helper for this? */
> +            int num_quads = bld->coord_type.length / 4;
> +            LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
> +            for (i = 0; i < num_quads; i++) {
> +               src[i] = float_size;
> +            }
> +            float_size = lp_build_concat(bld->gallivm, src,
> float_size_bld->type, num_quads);
> +         }
> +         else {
> +            float_size = lp_build_broadcast_scalar(coord_bld, float_size);
> +         }
> +         rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
> +
> +         if (dims <= 1) {
> +            rho = rho_vec;
> +         }
> +         else {
> +            if (dims >= 2) {
> +               static const unsigned char swizzle1[] = {
> +                  0, LP_BLD_SWIZZLE_DONTCARE,
>                    LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
>                 };
> -               rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3);
> -               rho = lp_build_max(coord_bld, rho, rho_r);
> +               static const unsigned char swizzle2[] = {
> +                  1, LP_BLD_SWIZZLE_DONTCARE,
> +                  LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> +               };
> +               LLVMValueRef rho_s, rho_t, rho_r;
> +
> +               rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
> +               rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
> +
> +               rho = lp_build_max(coord_bld, rho_s, rho_t);
> +
> +               if (dims >= 3) {
> +                  static const unsigned char swizzle3[] = {
> +                     2, LP_BLD_SWIZZLE_DONTCARE,
> +                     LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
> +                  };
> +                  rho_r = lp_build_swizzle_aos(coord_bld, rho_vec,
> swizzle3);
> +                  rho = lp_build_max(coord_bld, rho, rho_r);
> +               }
>              }
>           }
> -      }
> -      rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> -                                      perquadf_bld->type, rho, 0);
> -   }
> -   else {
> -      if (dims <= 1) {
> -         rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
> -      }
> -      rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
> -
> -      if (dims <= 1) {
> -         rho = rho_vec;
> +         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
> +                                         perquadf_bld->type, rho, 0);
>        }
>        else {
> -         if (dims >= 2) {
> -            LLVMValueRef rho_s, rho_t, rho_r;
> +         if (dims <= 1) {
> +            rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
> +         }
> +         rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
> +
> +         if (dims <= 1) {
> +            rho = rho_vec;
> +         }
> +         else {
> +            if (dims >= 2) {
> +               LLVMValueRef rho_s, rho_t, rho_r;
>  
> -            rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
> -            rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
> +               rho_s = LLVMBuildExtractElement(builder, rho_vec, index0,
> "");
> +               rho_t = LLVMBuildExtractElement(builder, rho_vec, index1,
> "");
>  
> -            rho = lp_build_max(float_bld, rho_s, rho_t);
> +               rho = lp_build_max(float_bld, rho_s, rho_t);
>  
> -            if (dims >= 3) {
> -               rho_r = LLVMBuildExtractElement(builder, rho_vec, index2,
> "");
> -               rho = lp_build_max(float_bld, rho, rho_r);
> +               if (dims >= 3) {
> +                  rho_r = LLVMBuildExtractElement(builder, rho_vec, index2,
> "");
> +                  rho = lp_build_max(float_bld, rho, rho_r);
> +               }
>              }
>           }
>        }
> @@ -511,6 +563,9 @@ void
>  lp_build_lod_selector(struct lp_build_sample_context *bld,
>                        unsigned texture_unit,
>                        unsigned sampler_unit,
> +                      LLVMValueRef s,
> +                      LLVMValueRef t,
> +                      LLVMValueRef r,
>                        const struct lp_derivatives *derivs,
>                        LLVMValueRef lod_bias, /* optional */
>                        LLVMValueRef explicit_lod, /* optional */
> @@ -544,7 +599,7 @@ lp_build_lod_selector(struct lp_build_sample_context
> *bld,
>        else {
>           LLVMValueRef rho;
>  
> -         rho = lp_build_rho(bld, texture_unit, derivs);
> +         rho = lp_build_rho(bld, texture_unit, s, t, r, derivs);
>  
>           /*
>            * Compute lod = log2(rho)
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> index 6306455..1abe0ca 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
> @@ -56,7 +56,8 @@ struct lp_build_context;
>   */
>  struct lp_derivatives
>  {
> -   LLVMValueRef ddx_ddy[2];
> +   LLVMValueRef ddx[3];
> +   LLVMValueRef ddy[3];
>  };
>  
>  
> @@ -366,6 +367,9 @@ void
>  lp_build_lod_selector(struct lp_build_sample_context *bld,
>                        unsigned texture_index,
>                        unsigned sampler_index,
> +                      LLVMValueRef s,
> +                      LLVMValueRef t,
> +                      LLVMValueRef r,
>                        const struct lp_derivatives *derivs,
>                        LLVMValueRef lod_bias, /* optional */
>                        LLVMValueRef explicit_lod, /* optional */
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> index 8aa4166..cdd910f 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
> @@ -1077,7 +1077,7 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>                         LLVMValueRef *s,
>                         LLVMValueRef *t,
>                         LLVMValueRef *r,
> -                       const struct lp_derivatives *derivs,
> +                       const struct lp_derivatives *derivs, /* optional */
>                         LLVMValueRef lod_bias, /* optional */
>                         LLVMValueRef explicit_lod, /* optional */
>                         LLVMValueRef *lod_ipart,
> @@ -1090,7 +1090,6 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>     const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
>     const unsigned target = bld->static_texture_state->target;
>     LLVMValueRef first_level;
> -   struct lp_derivatives face_derivs;
>  
>     /*
>     printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
> @@ -1107,11 +1106,6 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>        *t = face_t; /* vec */
>        /* use 'r' to indicate cube face */
>        *r = face; /* vec */
> -
> -      /* recompute ddx, ddy using the new (s,t) face texcoords */
> -      face_derivs.ddx_ddy[0] =
> lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, *s, *t);
> -      face_derivs.ddx_ddy[1] = NULL;
> -      derivs = &face_derivs;
>     }
>     else if (target == PIPE_TEXTURE_1D_ARRAY) {
>        *r = lp_build_iround(&bld->coord_bld, *t);
> @@ -1131,6 +1125,7 @@ lp_build_sample_common(struct lp_build_sample_context
> *bld,
>         * distinguish between minification/magnification with one mipmap
>         level.
>         */
>        lp_build_lod_selector(bld, texture_index, sampler_index,
> +                            *s, *t, *r,
>                              derivs, lod_bias, explicit_lod,
>                              mip_filter,
>                              lod_ipart, lod_fpart);
> @@ -1479,7 +1474,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
>                      unsigned sampler_index,
>                      const LLVMValueRef *coords,
>                      const LLVMValueRef *offsets,
> -                    const struct lp_derivatives *derivs,
> +                    const struct lp_derivatives *derivs, /* optional */
>                      LLVMValueRef lod_bias, /* optional */
>                      LLVMValueRef explicit_lod, /* optional */
>                      LLVMValueRef texel_out[4])
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> index 9a30cc8..98bce0e 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
> @@ -363,7 +363,7 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
>     unsigned target;
>     unsigned unit;
>     LLVMValueRef coords;
> -   struct lp_derivatives derivs;
> +   struct lp_derivatives derivs = { {NULL}, {NULL} };
>  
>     if (!bld->sampler) {
>        _debug_printf("warning: found texture instruction but no sampler
>        generator supplied\n");
> @@ -374,22 +374,15 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
>  
>     coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
>  
> -   if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> -      lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
> -      lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
> +   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> +      /* probably not going to work */
> +      derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 ,
> LP_CHAN_ALL);
> +      derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 ,
> LP_CHAN_ALL);
>        unit = inst->Src[3].Register.Index;
> -   }  else {
> -#if 0
> -      ddx = lp_build_ddx( &bld->bld_base.base, coords );
> -      ddy = lp_build_ddy( &bld->bld_base.base, coords );
> -#else
> -      /* TODO */
> -      derivs.ddx_ddy[0] = bld->bld_base.base.one;
> -      derivs.ddx_ddy[1] = bld->bld_base.base.one;
> -#endif
> +   }
> +   else {
>        unit = inst->Src[1].Register.Index;
>     }
> -
>     return bld->sampler->emit_fetch_texel(bld->sampler,
>                                           &bld->bld_base.base,
>                                           target, unit,
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 69957fe..9fe87c4 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -1164,14 +1164,13 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>            enum lp_build_tex_modifier modifier,
>            LLVMValueRef *texel)
>  {
> -   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> -   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
>     unsigned unit;
>     LLVMValueRef lod_bias, explicit_lod;
>     LLVMValueRef oow = NULL;
>     LLVMValueRef coords[4];
>     LLVMValueRef offsets[3] = { NULL };
>     struct lp_derivatives derivs;
> +   struct lp_derivatives *deriv_ptr = NULL;
>     unsigned num_coords;
>     unsigned dims;
>     unsigned i;
> @@ -1184,9 +1183,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>        return;
>     }
>  
> -   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
> -   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
> -
>     switch (inst->Texture.Texture) {
>     case TGSI_TEXTURE_1D:
>        num_coords = 1;
> @@ -1259,58 +1255,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>     }
>  
>     if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> -      LLVMValueRef i32undef =
> LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
> -      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
> -      LLVMValueRef ddxdyonec[3];
> -      unsigned length = bld->bld_base.base.type.length;
> -      unsigned num_quads = length / 4;
>        unsigned dim;
> -      unsigned quad;
> -
>        for (dim = 0; dim < dims; ++dim) {
> -         LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1,
> dim );
> -         LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2,
> dim );
> -         for (quad = 0; quad < num_quads; ++quad) {
> -            unsigned s1 = 4*quad;
> -            unsigned s2 = 4*quad + length;
> -            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
> -            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
> -            shuffles[4*quad + 2] = i32undef;
> -            shuffles[4*quad + 3] = i32undef;
> -         }
> -         ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
> -                                               LLVMConstVector(shuffles,
> length), "");
> -      }
> -      if (dims == 1) {
> -         derivs.ddx_ddy[0] = ddxdyonec[0];
> -      }
> -      else if (dims >= 2) {
> -         for (quad = 0; quad < num_quads; ++quad) {
> -            unsigned s1 = 4*quad;
> -            unsigned s2 = 4*quad + length;
> -            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
> -            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
> -            shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
> -            shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
> -         }
> -         derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0],
> ddxdyonec[1],
> -                                                  LLVMConstVector(shuffles,
> length), "");
> -         if (dims == 3) {
> -            derivs.ddx_ddy[1] = ddxdyonec[2];
> -         }
> +         derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim
> );
> +         derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim
> );
>        }
> +      deriv_ptr = &derivs;
>        unit = inst->Src[3].Register.Index;
>     }  else {
> -      if (dims == 1) {
> -         derivs.ddx_ddy[0] =
> lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
> -      }
> -      else if (dims >= 2) {
> -         derivs.ddx_ddy[0] =
> lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
> -                                                            coords[0],
> coords[1]);
> -         if (dims == 3) {
> -            derivs.ddx_ddy[1] =
> lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
> -         }
> -      }
>        unit = inst->Src[1].Register.Index;
>     }
>  
> @@ -1329,7 +1281,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
>                                    unit, unit,
>                                    coords,
>                                    offsets,
> -                                  &derivs,
> +                                  deriv_ptr,
>                                    lod_bias, explicit_lod,
>                                    texel);
>  }
> @@ -1341,13 +1293,13 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
>              boolean compare,
>              LLVMValueRef *texel)
>  {
> -   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
>     struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
>     unsigned texture_unit, sampler_unit;
>     LLVMValueRef lod_bias, explicit_lod;
>     LLVMValueRef coords[4];
>     LLVMValueRef offsets[3] = { NULL };
>     struct lp_derivatives derivs;
> +   struct lp_derivatives *deriv_ptr = NULL;
>     unsigned num_coords, dims;
>     unsigned i;
>  
> @@ -1366,9 +1318,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
>     texture_unit = inst->Src[1].Register.Index;
>     sampler_unit = inst->Src[2].Register.Index;
>  
> -   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
> -   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
> -
>     /*
>      * Note inst->Texture.Texture will contain the number of offsets,
>      * however the target information is NOT there and comes from the
> @@ -1449,57 +1398,12 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
>     }
>  
>     if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
> -      LLVMValueRef i32undef =
> LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
> -      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
> -      LLVMValueRef ddxdyonec[3];
> -      unsigned length = bld->bld_base.base.type.length;
> -      unsigned num_quads = length / 4;
>        unsigned dim;
> -      unsigned quad;
> -
>        for (dim = 0; dim < dims; ++dim) {
> -         LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 3,
> dim );
> -         LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 4,
> dim );
> -         for (quad = 0; quad < num_quads; ++quad) {
> -            unsigned s1 = 4*quad;
> -            unsigned s2 = 4*quad + length;
> -            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
> -            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
> -            shuffles[4*quad + 2] = i32undef;
> -            shuffles[4*quad + 3] = i32undef;
> -         }
> -         ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
> -                                               LLVMConstVector(shuffles,
> length), "");
> -      }
> -      if (dims == 1) {
> -         derivs.ddx_ddy[0] = ddxdyonec[0];
> -      }
> -      else if (dims >= 2) {
> -         for (quad = 0; quad < num_quads; ++quad) {
> -            unsigned s1 = 4*quad;
> -            unsigned s2 = 4*quad + length;
> -            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
> -            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
> -            shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
> -            shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
> -         }
> -         derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0],
> ddxdyonec[1],
> -                                                  LLVMConstVector(shuffles,
> length), "");
> -         if (dims == 3) {
> -            derivs.ddx_ddy[1] = ddxdyonec[2];
> -         }
> -      }
> -   }  else {
> -      if (dims == 1) {
> -         derivs.ddx_ddy[0] =
> lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
> -      }
> -      else if (dims >= 2) {
> -         derivs.ddx_ddy[0] =
> lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
> -                                                            coords[0],
> coords[1]);
> -         if (dims == 3) {
> -            derivs.ddx_ddy[1] =
> lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
> -         }
> +         derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim
> );
> +         derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim
> );
>        }
> +      deriv_ptr = &derivs;
>     }
>  
>     /* some advanced gather instructions (txgo) would require 4 offsets */
> @@ -1517,7 +1421,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
>                                    texture_unit, sampler_unit,
>                                    coords,
>                                    offsets,
> -                                  &derivs,
> +                                  deriv_ptr,
>                                    lod_bias, explicit_lod,
>                                    texel);
>  }
> @@ -1533,7 +1437,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
>     LLVMValueRef explicit_lod = NULL;
>     LLVMValueRef coords[3];
>     LLVMValueRef offsets[3] = { NULL };
> -   struct lp_derivatives derivs;
>     unsigned num_coords;
>     unsigned dims;
>     unsigned i;
> @@ -1548,9 +1451,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
>  
>     unit = inst->Src[1].Register.Index;
>  
> -   derivs.ddx_ddy[0] = coord_undef;
> -   derivs.ddx_ddy[1] = coord_undef;
> -
>     if (is_samplei) {
>        target = bld->sv[unit].Resource;
>     }
> @@ -1612,7 +1512,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context
> *bld,
>                                    unit, unit,
>                                    coords,
>                                    offsets,
> -                                  &derivs,
> +                                  NULL,
>                                    NULL, explicit_lod,
>                                    texel);
>  }
> --
> 1.7.9.5
> 
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallivm: clean up passing derivatives around

Reply via email to