We can't do this all the time, because you want blending to be done in
linear space, and sRGB would lose too much precision being done in 4x8.

total uniforms in shared programs: 32066 -> 32088 (0.07%)
uniforms in affected programs:     327 -> 349 (6.73%)
total instructions in shared programs: 98168 -> 94882 (-3.35%)
instructions in affected programs:     17234 -> 13948 (-19.07%)
---
 src/gallium/drivers/vc4/vc4_context.h         |   5 +-
 src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 286 +++++++++++++++++++++-----
 src/gallium/drivers/vc4/vc4_qir.h             |   2 +
 src/gallium/drivers/vc4/vc4_state.c           |   4 +-
 src/gallium/drivers/vc4/vc4_uniforms.c        |  30 ++-
 5 files changed, 276 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index 654c46f..d3cbaeb 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -292,7 +292,10 @@ struct vc4_context {
 
         struct vc4_vertex_stateobj *vtx;
 
-        struct pipe_blend_color blend_color;
+        struct {
+                struct pipe_blend_color f;
+                uint8_t ub[4];
+        } blend_color;
         struct pipe_stencil_ref stencil_ref;
         unsigned sample_mask;
         struct pipe_framebuffer_state framebuffer;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c 
b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index a372a6c..6af9be9 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -86,11 +86,11 @@ vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
 }
 
 static nir_ssa_def *
-vc4_blend_channel(nir_builder *b,
-                  nir_ssa_def **src,
-                  nir_ssa_def **dst,
-                  unsigned factor,
-                  int channel)
+vc4_blend_channel_f(nir_builder *b,
+                    nir_ssa_def **src,
+                    nir_ssa_def **dst,
+                    unsigned factor,
+                    int channel)
 {
         switch(factor) {
         case PIPE_BLENDFACTOR_ONE:
@@ -146,8 +146,75 @@ vc4_blend_channel(nir_builder *b,
 }
 
 static nir_ssa_def *
-vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
-               unsigned func)
+vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
+                        int chan)
+{
+        unsigned chan_mask = 0xff << (chan * 8);
+        return nir_ior(b,
+                       nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
+                       nir_iand(b, src1, nir_imm_int(b, chan_mask)));
+}
+
+static nir_ssa_def *
+vc4_blend_channel_i(nir_builder *b,
+                    nir_ssa_def *src,
+                    nir_ssa_def *dst,
+                    nir_ssa_def *src_a,
+                    nir_ssa_def *dst_a,
+                    unsigned factor,
+                    int a_chan)
+{
+        switch (factor) {
+        case PIPE_BLENDFACTOR_ONE:
+                return nir_imm_int(b, ~0);
+        case PIPE_BLENDFACTOR_SRC_COLOR:
+                return src;
+        case PIPE_BLENDFACTOR_SRC_ALPHA:
+                return src_a;
+        case PIPE_BLENDFACTOR_DST_ALPHA:
+                return dst_a;
+        case PIPE_BLENDFACTOR_DST_COLOR:
+                return dst;
+        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+                return vc4_nir_set_packed_chan(b,
+                                               nir_imin_4x8(b,
+                                                            src_a,
+                                                            nir_inot(b, 
dst_a)),
+                                               nir_imm_int(b, ~0),
+                                               a_chan);
+        case PIPE_BLENDFACTOR_CONST_COLOR:
+                return vc4_nir_get_state_uniform(b, 
QUNIFORM_BLEND_CONST_COLOR_RGBA);
+        case PIPE_BLENDFACTOR_CONST_ALPHA:
+                return vc4_nir_get_state_uniform(b, 
QUNIFORM_BLEND_CONST_COLOR_AAAA);
+        case PIPE_BLENDFACTOR_ZERO:
+                return nir_imm_int(b, 0);
+        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+                return nir_inot(b, src);
+        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+                return nir_inot(b, src_a);
+        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+                return nir_inot(b, dst_a);
+        case PIPE_BLENDFACTOR_INV_DST_COLOR:
+                return nir_inot(b, dst);
+        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+                return nir_inot(b, vc4_nir_get_state_uniform(b, 
QUNIFORM_BLEND_CONST_COLOR_RGBA));
+        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+                return nir_inot(b, vc4_nir_get_state_uniform(b, 
QUNIFORM_BLEND_CONST_COLOR_AAAA));
+
+        default:
+        case PIPE_BLENDFACTOR_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_SRC1_ALPHA:
+        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend factor %d\n", factor);
+                return nir_imm_int(b, ~0);
+        }
+}
+
+static nir_ssa_def *
+vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+                 unsigned func)
 {
         switch (func) {
         case PIPE_BLEND_ADD:
@@ -169,9 +236,33 @@ vc4_blend_func(nir_builder *b, nir_ssa_def *src, 
nir_ssa_def *dst,
         }
 }
 
+static nir_ssa_def *
+vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+                 unsigned func)
+{
+        switch (func) {
+        case PIPE_BLEND_ADD:
+                return nir_isadd_4x8(b, src, dst);
+        case PIPE_BLEND_SUBTRACT:
+                return nir_issub_4x8(b, src, dst);
+        case PIPE_BLEND_REVERSE_SUBTRACT:
+                return nir_issub_4x8(b, dst, src);
+        case PIPE_BLEND_MIN:
+                return nir_imin_4x8(b, src, dst);
+        case PIPE_BLEND_MAX:
+                return nir_imax_4x8(b, src, dst);
+
+        default:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend func %d\n", func);
+                return src;
+
+        }
+}
+
 static void
-vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
-                nir_ssa_def **src_color, nir_ssa_def **dst_color)
+vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+                  nir_ssa_def **src_color, nir_ssa_def **dst_color)
 {
         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
 
@@ -192,20 +283,106 @@ vc4_do_blending(struct vc4_compile *c, nir_builder *b, 
nir_ssa_def **result,
                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
                                   blend->alpha_dst_factor);
                 src_blend[i] = nir_fmul(b, src_color[i],
-                                        vc4_blend_channel(b,
-                                                          src_color, dst_color,
-                                                          src_factor, i));
+                                        vc4_blend_channel_f(b,
+                                                            src_color, 
dst_color,
+                                                            src_factor, i));
                 dst_blend[i] = nir_fmul(b, dst_color[i],
-                                        vc4_blend_channel(b,
-                                                          src_color, dst_color,
-                                                          dst_factor, i));
+                                        vc4_blend_channel_f(b,
+                                                            src_color, 
dst_color,
+                                                            dst_factor, i));
         }
 
         for (int i = 0; i < 4; i++) {
-                result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
-                                           ((i != 3) ? blend->rgb_func :
-                                            blend->alpha_func));
+                result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
+                                             ((i != 3) ? blend->rgb_func :
+                                              blend->alpha_func));
+        }
+}
+
+static nir_ssa_def *
+vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
+{
+        nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 
8)));
+        return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
+}
+
+static nir_ssa_def *
+vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
+                  nir_ssa_def *src_color, nir_ssa_def *dst_color,
+                  nir_ssa_def *src_float_a)
+{
+        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+        if (!blend->blend_enable)
+                return src_color;
+
+        enum pipe_format color_format = c->fs_key->color_format;
+        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+        nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
+        nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
+        nir_ssa_def *dst_a;
+        int alpha_chan;
+        for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
+                if (format_swiz[alpha_chan] == 3)
+                        break;
+        }
+        if (alpha_chan != 4) {
+                nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
+                dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
+                                                              shift), 
imm_0xff));
+        } else {
+                dst_a = nir_imm_int(b, ~0);
+        }
+
+        nir_ssa_def *src_factor = vc4_blend_channel_i(b,
+                                                      src_color, dst_color,
+                                                      src_a, dst_a,
+                                                      blend->rgb_src_factor,
+                                                      alpha_chan);
+        nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
+                                                      src_color, dst_color,
+                                                      src_a, dst_a,
+                                                      blend->rgb_dst_factor,
+                                                      alpha_chan);
+
+        if (alpha_chan != 4 &&
+            blend->alpha_src_factor != blend->rgb_src_factor) {
+                nir_ssa_def *src_alpha_factor =
+                        vc4_blend_channel_i(b,
+                                            src_color, dst_color,
+                                            src_a, dst_a,
+                                            blend->alpha_src_factor,
+                                            alpha_chan);
+                src_factor = vc4_nir_set_packed_chan(b, src_factor,
+                                                     src_alpha_factor,
+                                                     alpha_chan);
+        }
+        if (alpha_chan != 4 &&
+            blend->alpha_dst_factor != blend->rgb_dst_factor) {
+                nir_ssa_def *dst_alpha_factor =
+                        vc4_blend_channel_i(b,
+                                            src_color, dst_color,
+                                            src_a, dst_a,
+                                            blend->alpha_dst_factor,
+                                            alpha_chan);
+                dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
+                                                     dst_alpha_factor,
+                                                     alpha_chan);
+        }
+        nir_ssa_def *src_blend = nir_imul_unorm_4x8(b, src_color, src_factor);
+        nir_ssa_def *dst_blend = nir_imul_unorm_4x8(b, dst_color, dst_factor);
+
+        nir_ssa_def *result =
+                vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
+        if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
+                nir_ssa_def *result_a = vc4_blend_func_i(b,
+                                                         src_blend,
+                                                         dst_blend,
+                                                         blend->alpha_func);
+                result = vc4_nir_set_packed_chan(b, result, result_a,
+                                                 alpha_chan);
         }
+        return result;
 }
 
 static nir_ssa_def *
@@ -299,12 +476,33 @@ vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, 
nir_builder *b,
         nir_builder_instr_insert(b, &discard->instr);
 }
 
+static nir_ssa_def *
+vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
+                         nir_ssa_def **colors)
+{
+        enum pipe_format color_format = c->fs_key->color_format;
+        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+        nir_ssa_def *swizzled[4];
+        for (int i = 0; i < 4; i++) {
+                swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
+                                                           format_swiz[i]);
+        }
+
+        return nir_pack_unorm_4x8(b,
+                                  nir_vec4(b,
+                                           swizzled[0], swizzled[1],
+                                           swizzled[2], swizzled[3]));
+
+}
+
 static void
 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
                           nir_intrinsic_instr *intr)
 {
         enum pipe_format color_format = c->fs_key->color_format;
         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+        bool srgb = util_format_is_srgb(color_format);
 
         /* Pull out the float src/dst color components. */
         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
@@ -315,45 +513,39 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, 
nir_builder *b,
                 unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
         }
 
-        /* Unswizzle the destination color. */
-        nir_ssa_def *dst_color[4];
-        for (unsigned i = 0; i < 4; i++) {
-                dst_color[i] = vc4_nir_get_swizzled_channel(b,
-                                                            unpacked_dst_color,
-                                                            format_swiz[i]);
-        }
-
         vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
 
-        /* Turn dst color to linear. */
-        if (util_format_is_srgb(color_format)) {
+        nir_ssa_def *packed_color;
+        if (srgb) {
+                /* Unswizzle the destination color. */
+                nir_ssa_def *dst_color[4];
+                for (unsigned i = 0; i < 4; i++) {
+                        dst_color[i] = vc4_nir_get_swizzled_channel(b,
+                                                                    
unpacked_dst_color,
+                                                                    
format_swiz[i]);
+                }
+
+                /* Turn dst color to linear. */
                 for (int i = 0; i < 3; i++)
                         dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
-        }
 
-        nir_ssa_def *blend_color[4];
-        vc4_do_blending(c, b, blend_color, src_color, dst_color);
+                nir_ssa_def *blend_color[4];
+                vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
 
-        /* sRGB encode the output color */
-        if (util_format_is_srgb(color_format)) {
+                /* sRGB encode the output color */
                 for (int i = 0; i < 3; i++)
                         blend_color[i] = vc4_nir_srgb_encode(b, 
blend_color[i]);
-        }
 
-        nir_ssa_def *swizzled_outputs[4];
-        for (int i = 0; i < 4; i++) {
-                swizzled_outputs[i] =
-                        vc4_nir_get_swizzled_channel(b, blend_color,
-                                                     format_swiz[i]);
-        }
+                packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
+        } else {
+                nir_ssa_def *packed_src_color =
+                        vc4_nir_swizzle_and_pack(c, b, src_color);
 
-        nir_ssa_def *packed_color =
-                nir_pack_unorm_4x8(b,
-                                   nir_vec4(b,
-                                            swizzled_outputs[0],
-                                            swizzled_outputs[1],
-                                            swizzled_outputs[2],
-                                            swizzled_outputs[3]));
+                packed_color =
+                        vc4_do_blending_i(c, b,
+                                          packed_src_color, packed_dst_color,
+                                          src_color[3]);
+        }
 
         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
                                    packed_color, packed_dst_color);
diff --git a/src/gallium/drivers/vc4/vc4_qir.h 
b/src/gallium/drivers/vc4/vc4_qir.h
index d032fab..ddb8002 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -253,6 +253,8 @@ enum quniform_contents {
         QUNIFORM_BLEND_CONST_COLOR_Y,
         QUNIFORM_BLEND_CONST_COLOR_Z,
         QUNIFORM_BLEND_CONST_COLOR_W,
+        QUNIFORM_BLEND_CONST_COLOR_RGBA,
+        QUNIFORM_BLEND_CONST_COLOR_AAAA,
 
         QUNIFORM_STENCIL,
 
diff --git a/src/gallium/drivers/vc4/vc4_state.c 
b/src/gallium/drivers/vc4/vc4_state.c
index 8a759c2..1476946 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -51,7 +51,9 @@ vc4_set_blend_color(struct pipe_context *pctx,
                     const struct pipe_blend_color *blend_color)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        vc4->blend_color = *blend_color;
+        vc4->blend_color.f = *blend_color;
+        for (int i = 0; i < 4; i++)
+                vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]);
         vc4->dirty |= VC4_DIRTY_BLEND_COLOR;
 }
 
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c 
b/src/gallium/drivers/vc4/vc4_uniforms.c
index 85d6998..f5ad481 100644
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -262,11 +262,35 @@ vc4_write_uniforms(struct vc4_context *vc4, struct 
vc4_compiled_shader *shader,
                 case QUNIFORM_BLEND_CONST_COLOR_Z:
                 case QUNIFORM_BLEND_CONST_COLOR_W:
                         cl_aligned_f(&uniforms,
-                                     
CLAMP(vc4->blend_color.color[uinfo->contents[i] -
-                                                                  
QUNIFORM_BLEND_CONST_COLOR_X],
+                                     
CLAMP(vc4->blend_color.f.color[uinfo->contents[i] -
+                                                                    
QUNIFORM_BLEND_CONST_COLOR_X],
                                            0, 1));
                         break;
 
+                case QUNIFORM_BLEND_CONST_COLOR_RGBA: {
+                        const uint8_t *format_swiz =
+                                
vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format);
+                        uint32_t color = 0;
+                        for (int i = 0; i < 4; i++) {
+                                if (format_swiz[i] >= 4)
+                                        continue;
+
+                                color |= (vc4->blend_color.ub[format_swiz[i]] 
<<
+                                          (i * 8));
+                        }
+                        cl_aligned_u32(&uniforms, color);
+                        break;
+                }
+
+                case QUNIFORM_BLEND_CONST_COLOR_AAAA: {
+                        uint8_t a = vc4->blend_color.ub[3];
+                        cl_aligned_u32(&uniforms, ((a) |
+                                                   (a << 8) |
+                                                   (a << 16) |
+                                                   (a << 24)));
+                        break;
+                }
+
                 case QUNIFORM_STENCIL:
                         cl_aligned_u32(&uniforms,
                                        
vc4->zsa->stencil_uniforms[uinfo->data[i]] |
@@ -330,6 +354,8 @@ vc4_set_shader_uniform_dirty_flags(struct 
vc4_compiled_shader *shader)
                 case QUNIFORM_BLEND_CONST_COLOR_Y:
                 case QUNIFORM_BLEND_CONST_COLOR_Z:
                 case QUNIFORM_BLEND_CONST_COLOR_W:
+                case QUNIFORM_BLEND_CONST_COLOR_RGBA:
+                case QUNIFORM_BLEND_CONST_COLOR_AAAA:
                         dirty |= VC4_DIRTY_BLEND_COLOR;
                         break;
 
-- 
2.1.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to