Requires evergreen/cayman

Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com>
---
This patch depends on r600g: Implement GL_ARB_sample_shading

Implementation note: interpolateAtSample/Offset is doing affine
rather than perspective correct reverse projection, I think for
the small intra-pixel offsets used this should typically give
more accurate results due to not losing bits by the reciprocal
and additional multiplication. As a bonus slightly fewer ops,
and as far as I can tell it is within spec to skip the
perspective correction.

 docs/GL3.txt                               |   2 +-
 src/gallium/drivers/r600/evergreen_state.c |   3 +
 src/gallium/drivers/r600/r600_shader.c     | 211 ++++++++++++++++++++++++++++-
 3 files changed, 211 insertions(+), 5 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 5baacc1..8e69068 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -107,7 +107,7 @@ GL 4.0, GLSL 4.00:
   - Geometry shader instancing                         DONE (r600)
   - Geometry shader multiple streams                   DONE ()
   - Enhanced per-sample shading                        DONE (r600)
-  - Interpolation functions                            DONE ()
+  - Interpolation functions                            DONE (r600)
   - New overload resolution rules                      DONE
   GL_ARB_gpu_shader_fp64                               started (Dave)
   GL_ARB_sample_shading                                DONE (i965, nv50, nvc0, 
r600, radeonsi)
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 9531893..b857292 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1427,6 +1427,9 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
                assert(rctx->framebuffer.nr_samples <= Elements(values)/4);
                for (i = 0; i < rctx->framebuffer.nr_samples; i++) {
                        ctx->get_sample_position(ctx, 
rctx->framebuffer.nr_samples, i, &values[4*i]);
+                       /* interpolateAtSample needs offset from center point */
+                       values[4*i+2] = values[4*i+0] - 0.5f;
+                       values[4*i+3] = values[4*i+1] - 0.5f;
                }
                constbuf.user_buffer = values;
                constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4;
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index fd7d845..7e5a526 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -294,6 +294,8 @@ struct r600_shader_ctx {
        int                                     face_gpr;
        /* sample id is .w component stored in fixed point position register */
        int                                     fixed_pt_position_gpr;
+       /* evergreen/cayman needs to add shader->input entries for 
SAMPLEMASK/SAMPLEID */
+       int                             num_system_value_inputs;
        int                                     colors_used;
        boolean                 clip_vertex_write;
        unsigned                cv_output;
@@ -778,6 +780,8 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
                }
        }
 
+       ctx->num_system_value_inputs = num_regs;
+
        return num_regs;
 }
 
@@ -814,7 +818,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
                        ctx->eg_interpolators[k].enabled = TRUE;
        }
 
-       /* Need to scan shader for system values */
+       /* Need to scan shader for interpolateAtSample/Offset/Centroid */
        if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
                return 0;
        }
@@ -824,7 +828,31 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
        while (!tgsi_parse_end_of_tokens(&parse)) {
                tgsi_parse_token(&parse);
 
-               if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
+               if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+                       const struct tgsi_full_instruction *inst
+                               = &parse.FullToken.FullInstruction;
+                       if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE ||
+                               inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET ||
+                               inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_CENTROID)
+                       {
+                               int interpolate, location, k;
+
+                               if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE) {
+                                       location = TGSI_INTERPOLATE_LOC_CENTER;
+                                       has_sampleid = true;
+                               } else if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_OFFSET) {
+                                       location = TGSI_INTERPOLATE_LOC_CENTER;
+                                       /* Needs sample positions, currently 
those are always available */
+                               } else {
+                                       location = 
TGSI_INTERPOLATE_LOC_CENTROID;
+                               }
+
+                               interpolate = 
ctx->info.input_interpolate[inst->Src[0].Register.Index];
+                               k = eg_get_interpolator_index(interpolate, 
location);
+                               ctx->eg_interpolators[k].enabled = true;
+                       }
+               }
+               else if (parse.FullToken.Token.Type == 
TGSI_TOKEN_TYPE_DECLARATION) {
                        struct tgsi_full_declaration *d = 
&parse.FullToken.FullDeclaration;
                        if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
                                if (d->Semantic.Name == 
TGSI_SEMANTIC_SAMPLEMASK) {
@@ -893,8 +921,8 @@ static int load_sample_position(struct r600_shader_ctx 
*ctx, struct r600_shader_
        vtx.dst_gpr = t1;
        vtx.dst_sel_x = 0;
        vtx.dst_sel_y = 1;
-       vtx.dst_sel_z = 7;
-       vtx.dst_sel_w = 7;
+       vtx.dst_sel_z = 2;
+       vtx.dst_sel_w = 3;
        vtx.data_format = FMT_32_32_32_32_FLOAT;
        vtx.num_format_all = 2;
        vtx.format_comp_all = 1;
@@ -1767,6 +1795,7 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
 
        ctx.face_gpr = -1;
        ctx.fixed_pt_position_gpr = -1;
+       ctx.num_system_value_inputs = 0;
        ctx.fragcoord_input = -1;
        ctx.colors_used = 0;
        ctx.clip_vertex_write = 0;
@@ -4550,6 +4579,171 @@ static int tgsi_msb(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int tgsi_interp_egcm(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti;
+       unsigned location;
+       int input;
+
+       assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
+
+       input = inst->Src[0].Register.Index + ctx->num_system_value_inputs;
+
+       /* Interpolators have been marked for use already by 
allocate_system_value_inputs */
+       if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+               inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+               location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will 
be added explicitly */
+       }
+       else {
+               location = TGSI_INTERPOLATE_LOC_CENTROID;
+       }
+
+       k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, 
location);
+       if (k < 0)
+               k = 0;
+       interp_gpr = ctx->eg_interpolators[k].ij_index / 2;
+       interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2);
+
+       /* NOTE: currently offset is not perspective correct */
+       if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+               inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+               int sample_gpr = -1;
+               int gradientsH, gradientsV;
+               struct r600_bytecode_tex tex;
+
+               if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+                       sample_gpr = load_sample_position(ctx, &ctx->src[1], 
ctx->src[1].swizzle[0]);
+               }
+
+               gradientsH = r600_get_temp(ctx);
+               gradientsV = r600_get_temp(ctx);
+               for (i = 0; i < 2; i++) {
+                       memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+                       tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : 
FETCH_OP_GET_GRADIENTS_V;
+                       tex.src_gpr = interp_gpr;
+                       tex.src_sel_x = interp_base_chan + 0;
+                       tex.src_sel_y = interp_base_chan + 1;
+                       tex.src_sel_z = 0;
+                       tex.src_sel_w = 0;
+                       tex.dst_gpr = i == 0 ? gradientsH : gradientsV;
+                       tex.dst_sel_x = 0;
+                       tex.dst_sel_y = 1;
+                       tex.dst_sel_z = 7;
+                       tex.dst_sel_w = 7;
+                       tex.inst_mod = 1; // Use per pixel gradient calculation
+                       tex.sampler_id = 0;
+                       tex.resource_id = tex.sampler_id;
+                       r = r600_bytecode_add_tex(ctx->bc, &tex);
+                       if (r)
+                               return r;
+               }
+
+               for (i = 0; i < 2; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP3_MULADD;
+                       alu.is_op3 = 1;
+                       alu.src[0].sel = gradientsH;
+                       alu.src[0].chan = i;
+                       if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE) {
+                               alu.src[1].sel = sample_gpr;
+                               alu.src[1].chan = 2;
+                       }
+                       else {
+                               r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
+                       }
+                       alu.src[2].sel = interp_gpr;
+                       alu.src[2].chan = interp_base_chan + i;
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = i;
+                       alu.last = i == 1;
+
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+
+               for (i = 0; i < 2; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP3_MULADD;
+                       alu.is_op3 = 1;
+                       alu.src[0].sel = gradientsV;
+                       alu.src[0].chan = i;
+                       if (inst->Instruction.Opcode == 
TGSI_OPCODE_INTERP_SAMPLE) {
+                               alu.src[1].sel = sample_gpr;
+                               alu.src[1].chan = 3;
+                       }
+                       else {
+                               r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
+                       }
+                       alu.src[2].sel = ctx->temp_reg;
+                       alu.src[2].chan = i;
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = i;
+                       alu.last = i == 1;
+
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+
+       tmp = r600_get_temp(ctx);
+       for (i = 0; i < 8; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY;
+
+               alu.dst.sel = tmp;
+               if ((i > 1 && i < 6)) {
+                       alu.dst.write = 1;
+               }
+               else {
+                       alu.dst.write = 0;
+               }
+               alu.dst.chan = i % 4;
+
+               if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+                       inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = 1 - (i % 2);
+               } else {
+                       alu.src[0].sel = interp_gpr;
+                       alu.src[0].chan = interp_base_chan + 1 - (i % 2);
+               }
+               alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + 
ctx->shader->input[input].lds_pos;
+               alu.src[1].chan = 0;
+
+               alu.last = i % 4 == 3;
+               alu.bank_swizzle_force = SQ_ALU_VEC_210;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       // INTERP can't swizzle dst
+       lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+       for (i = 0; i <= lasti; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = tmp;
+               alu.src[0].chan = ctx->src[0].swizzle[i];
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = 1;
+               alu.last = i == lasti;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
+
 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct 
tgsi_full_instruction *inst)
 {
        struct r600_bytecode_alu alu;
@@ -7076,6 +7270,9 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_LSB,       0, ALU_OP1_FFBL_INT, tgsi_unsupported},
        {TGSI_OPCODE_IMSB,      0, ALU_OP1_FFBH_INT, tgsi_unsupported},
        {TGSI_OPCODE_UMSB,      0, ALU_OP1_FFBH_UINT, tgsi_unsupported},
+       {TGSI_OPCODE_INTERP_CENTROID,   0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_INTERP_SAMPLE,             0, ALU_OP0_NOP, 
tgsi_unsupported},
+       {TGSI_OPCODE_INTERP_OFFSET,             0, ALU_OP0_NOP, 
tgsi_unsupported},
        {TGSI_OPCODE_LAST,      0, ALU_OP0_NOP, tgsi_unsupported},
 };
 
@@ -7278,6 +7475,9 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_LSB,       0, ALU_OP1_FFBL_INT, tgsi_op2},
        {TGSI_OPCODE_IMSB,      0, ALU_OP1_FFBH_INT, tgsi_msb},
        {TGSI_OPCODE_UMSB,      0, ALU_OP1_FFBH_UINT, tgsi_msb},
+       {TGSI_OPCODE_INTERP_CENTROID,   0, ALU_OP0_NOP, tgsi_interp_egcm},
+       {TGSI_OPCODE_INTERP_SAMPLE,             0, ALU_OP0_NOP, 
tgsi_interp_egcm},
+       {TGSI_OPCODE_INTERP_OFFSET,             0, ALU_OP0_NOP, 
tgsi_interp_egcm},
        {TGSI_OPCODE_LAST,      0, ALU_OP0_NOP, tgsi_unsupported},
 };
 
@@ -7481,5 +7681,8 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_LSB,       0, ALU_OP1_FFBL_INT, tgsi_op2},
        {TGSI_OPCODE_IMSB,      0, ALU_OP1_FFBH_INT, tgsi_msb},
        {TGSI_OPCODE_UMSB,      0, ALU_OP1_FFBH_UINT, tgsi_msb},
+       {TGSI_OPCODE_INTERP_CENTROID,   0, ALU_OP0_NOP, tgsi_interp_egcm},
+       {TGSI_OPCODE_INTERP_SAMPLE,             0, ALU_OP0_NOP, 
tgsi_interp_egcm},
+       {TGSI_OPCODE_INTERP_OFFSET,             0, ALU_OP0_NOP, 
tgsi_interp_egcm},
        {TGSI_OPCODE_LAST,      0, ALU_OP0_NOP, tgsi_unsupported},
 };
-- 
1.9.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to