On 12/06/2012 06:20 AM, jfons...@vmware.com wrote:
From: José Fonseca<jfons...@vmware.com>

Several issues acutally:

"actually"



- Fix a regression in unsigned normalized in the rescaling
   [0, 255] to [0, 256]

- Ensure we use signed shifts where appropriate (instead of
   unsigned shifts)

- Refactor the code slightly -- move all the logic inside
   lp_build_lerp_simple().

This change, plus an adjustment in the tolerance of signed normalized
results in piglit fbo-blending-formats fixes bug 57903
---
  src/gallium/auxiliary/gallivm/lp_bld_arit.c |   92 +++++++++++++--------------
  1 file changed, 43 insertions(+), 49 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 8b19ebd..d930f09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -685,7 +685,7 @@ lp_build_sub(struct lp_build_context *bld,
  /**
   * Normalized multiplication.
   *
- * There are several approaches here (using 8-bit normalized multiplication as
+ * There are several approaches for (using 8-bit normalized multiplication as
   * an example):
   *
   * - alpha plus one
@@ -694,7 +694,7 @@ lp_build_sub(struct lp_build_context *bld,
   *
   *       a*b/255 ~= (a*(b + 1))>>  256
   *
- *     which is the fastest method that satisfies the following OpenGL criteria
+ *     which is the fastest method that satisfies the following OpenGL 
criteria of
   *
   *       0*0 = 0 and 255*255 = 255
   *
@@ -710,7 +710,7 @@ lp_build_sub(struct lp_build_context *bld,
   *
   *     note that just by itself it doesn't satisfies the OpenGL criteria, as
   *     255*255 = 254, so the special case b = 255 must be accounted or 
roundoff
- *     must be used
+ *     must be used.
   *
   * - geometric series plus rounding
   *
@@ -719,7 +719,9 @@ lp_build_sub(struct lp_build_context *bld,
   *
   *       t/255 ~= (t + (t>>  8) + 0x80)>>  8
   *
- *     achieving the exact results
+ *     achieving the exact results.
+ *
+ *
   *
   * @sa Alvy Ray Smith, Image Compositing Fundamentals, Tech Memo 4, Aug 15, 
1995,
   *     ftp://ftp.alvyray.com/Acrobat/4_Comp.pdf
@@ -733,8 +735,7 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
  {
     LLVMBuilderRef builder = gallivm->builder;
     struct lp_build_context bld;
-   unsigned bits;
-   LLVMValueRef shift;
+   unsigned n;
     LLVMValueRef half;
     LLVMValueRef ab;

@@ -744,29 +745,28 @@ lp_build_mul_norm(struct gallivm_state *gallivm,

     lp_build_context_init(&bld, gallivm, wide_type);

-   bits = wide_type.width / 2;
+   n = wide_type.width / 2;
     if (wide_type.sign) {
-      --bits;
+      --n;
     }

-   shift = lp_build_const_int_vec(gallivm, wide_type, bits);
+   /*
+    * TODO: for 16bits normalized SSE2 vectors we could consider using PMULHUW
+    * 
http://ssp.impulsetrain.com/2011/07/03/multiplying-normalized-16-bit-numbers-with-sse2/
+    */

-#if 0
-
-   /* a*b/255 ~= (a*(b + 1))>>  256 */
-   /* XXX: This would not work for signed types */
-   assert(!wide_type.sign);
-   b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(gallium, wide_type, 1), 
"");
-   ab = LLVMBuildMul(builder, a, b, "");
+   /*
+    * a*b / (2**n - 1) ~= (a*b + (a*b>>  n) + half)>>  n
+    */

-#else
-
-   /* ab/255 ~= (ab + (ab>>  8) + 0x80)>>  8 */
     ab = LLVMBuildMul(builder, a, b, "");
-   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, shift, ""), "");
+   ab = LLVMBuildAdd(builder, ab, lp_build_shr_imm(&bld, ab, n), "");

-   /* Add rounding term */
-   half = lp_build_const_int_vec(gallivm, wide_type, 1<<  (bits - 1));
+   /*
+    * half = sgn(ab) * 0.5 * (2 ** n) = sgn(ab) * (1<<  (n - 1))
+    */
+
+   half = lp_build_const_int_vec(gallivm, wide_type, 1<<  (n - 1));
     if (wide_type.sign) {
        LLVMValueRef minus_half = LLVMBuildNeg(builder, half, "");
        LLVMValueRef sign = lp_build_shr_imm(&bld, half, wide_type.width - 1);
@@ -774,9 +774,8 @@ lp_build_mul_norm(struct gallivm_state *gallivm,
     }
     ab = LLVMBuildAdd(builder, ab, half, "");

-#endif
-
-   ab = LLVMBuildLShr(builder, ab, shift, "");
+   /* Final division */
+   ab = lp_build_shr_imm(&bld, ab, n);

     return ab;
  }
@@ -988,14 +987,28 @@ lp_build_lerp_simple(struct lp_build_context *bld,

     delta = lp_build_sub(bld, v1, v0);

-   res = lp_build_mul(bld, x, delta);
-
     if (normalized) {
-      if (bld->type.sign) {
-         res = lp_build_shr_imm(bld, res, half_width - 1);
-      } else {
+      if (!bld->type.sign) {
+         /*
+          * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the
+          * most-significant-bit to the lowest-significant-bit, so that
+          * later we can just divide by 2**n instead of 2**n - 1.
+          */
+         x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1));
+
+         /* (x * delta)>>  n */
+         res = lp_build_mul(bld, x, delta);
           res = lp_build_shr_imm(bld, res, half_width);
+      } else {
+         /*
+          * The rescaling trick above doesn't work for signed numbers, so
+          * use the 2**n - 1 divison approximation in lp_build_mul_norm
+          * instead.
+          */
+         res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta);
        }
+   } else {
+      res = lp_build_mul(bld, x, delta);
     }

     res = lp_build_add(bld, v0, res);
@@ -1022,7 +1035,6 @@ lp_build_lerp(struct lp_build_context *bld,
                LLVMValueRef v0,
                LLVMValueRef v1)
  {
-   LLVMBuilderRef builder = bld->gallivm->builder;
     const struct lp_type type = bld->type;
     LLVMValueRef res;

@@ -1034,8 +1046,6 @@ lp_build_lerp(struct lp_build_context *bld,
        struct lp_type wide_type;
        struct lp_build_context wide_bld;
        LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh;
-      unsigned bits;
-      LLVMValueRef shift;

        assert(type.length>= 2);

@@ -1055,22 +1065,6 @@ lp_build_lerp(struct lp_build_context *bld,
        lp_build_unpack2(bld->gallivm, type, wide_type, v1,&v1l,&v1h);

        /*
-       * Scale x from [0, 255] to [0, 256]
-       */
-
-      bits = type.width - 1;
-      if (type.sign) {
-         --bits;
-      }
-
-      shift = lp_build_const_int_vec(bld->gallivm, wide_type, bits - 1);
-
-      xl = lp_build_add(&wide_bld, xl,
-                        LLVMBuildAShr(builder, xl, shift, ""));
-      xh = lp_build_add(&wide_bld, xh,
-                        LLVMBuildAShr(builder, xh, shift, ""));
-
-      /*
         * Lerp both halves.
         */


Reviewed-by: Brian Paul <bri...@vmware.com>

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to