From: Dave Airlie <airl...@redhat.com>

This adds support for fine derivatives and enables
ARB_derivative_control on radeonsi.

(just fell out of my working out interpolation)

v2: cleanup some bits, write a comment
v2.1: take Michel's comment from the mailing list

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 docs/GL3.txt                             |  2 +-
 docs/relnotes/10.7.0.html                |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 52 +++++++++++++++++++++++++++++---
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index e3fa1a1..15bb57f 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -191,7 +191,7 @@ GL 4.5, GLSL 4.50:
   GL_ARB_clip_control                                  DONE (i965, nv50, nvc0, 
r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_conditional_render_inverted                   DONE (i965, nv50, nvc0, 
llvmpipe, softpipe)
   GL_ARB_cull_distance                                 in progress (Tobias)
-  GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, 
r600)
+  GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, 
r600, radeonsi)
   GL_ARB_direct_state_access                           DONE (all drivers)
   GL_ARB_get_texture_sub_image                         DONE (all drivers)
   GL_ARB_shader_texture_image_samples                  not started
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
index 26615a8..afef525 100644
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 <ul>
 <li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
+<li>GL_ARB_derivative_control on radeonsi</li>
 <li>GL_ARB_fragment_layer_viewport on radeonsi</li>
 <li>GL_ARB_framebuffer_no_attachments on i965</li>
 <li>GL_ARB_get_texture_sub_image for all drivers</li>
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index c2985b8..ebe1f5a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -249,6 +249,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
        case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
        case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
        case PIPE_CAP_TGSI_TEXCOORD:
+       case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
                return 1;
 
        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -289,7 +290,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
        case PIPE_CAP_USER_VERTEX_BUFFERS:
        case PIPE_CAP_FAKE_SW_MSAA:
        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-       case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
        case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
        case PIPE_CAP_SAMPLER_VIEW_TARGET:
        case PIPE_CAP_VERTEXID_NOBASE:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 81f7bdb..fee427f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2841,6 +2841,35 @@ static void build_txq_intrinsic(const struct 
lp_build_tgsi_action * action,
        }
 }
 
+/*
+ * SI implements derivatives using the local data store (LDS)
+ * All writes to the LDS happen in all executing threads at
+ * the same time. TID is the Thread ID for the current
+ * thread and is a value between 0 and 63, representing
+ * the thread's position in the wavefront.
+ *
+ * For the pixel shader threads are grouped into quads of four pixels.
+ * The TIDs of the pixels of a quad are:
+ *
+ *  +------+------+
+ *  |4n + 0|4n + 1|
+ *  +------+------+
+ *  |4n + 2|4n + 3|
+ *  +------+------+
+ *
+ * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
+ * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
+ * the current pixel's column, and masking with 0xfffffffe yields the TID
+ * of the left pixel of the current pixel's row.
+ *
+ * Adding 1 yields the TID of the pixel to the right of the left pixel, and
+ * adding 2 yields the TID of the pixel below the top pixel.
+ */
+/* masks for thread ID. */
+#define TID_MASK_TOP_LEFT 0xfffffffc
+#define TID_MASK_TOP      0xfffffffd
+#define TID_MASK_LEFT     0xfffffffe
+
 static void si_llvm_emit_ddxy(
        const struct lp_build_tgsi_action * action,
        struct lp_build_tgsi_context * bld_base,
@@ -2857,6 +2886,8 @@ static void si_llvm_emit_ddxy(
        LLVMTypeRef i32;
        unsigned swizzle[4];
        unsigned c;
+       int idx;
+       unsigned mask;
 
        i32 = LLVMInt32TypeInContext(gallivm->context);
 
@@ -2866,15 +2897,22 @@ static void si_llvm_emit_ddxy(
        store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
                                 indices, 2, "");
 
+       if (opcode == TGSI_OPCODE_DDX_FINE)
+               mask = TID_MASK_LEFT;
+       else if (opcode == TGSI_OPCODE_DDY_FINE)
+               mask = TID_MASK_TOP;
+       else
+               mask = TID_MASK_TOP_LEFT;
+
        indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
-                                 lp_build_const_int32(gallivm, 0xfffffffc), 
"");
+                                 lp_build_const_int32(gallivm, mask), "");
        load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
                                 indices, 2, "");
 
+       /* for DDX we want to next X pixel, DDY next Y pixel. */
+       idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 
: 2;
        indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
-                                 lp_build_const_int32(gallivm,
-                                                      opcode == 
TGSI_OPCODE_DDX ? 1 : 2),
-                                 "");
+                                 lp_build_const_int32(gallivm, idx), "");
        load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
                                 indices, 2, "");
 
@@ -3216,7 +3254,9 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
 
        if (bld_base->info &&
            (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
-            bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
+            bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
+            bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
+            bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0))
                si_shader_ctx->lds =
                        LLVMAddGlobalInAddressSpace(gallivm->module,
                                                    LLVMArrayType(i32, 64),
@@ -3709,6 +3749,8 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
 
        bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
        bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
+       bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
+       bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
 
        bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
        bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
-- 
2.4.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to