Reviewed-by: Marek Olšák <marek.ol...@amd.com> Marek
On Sat, Jul 25, 2015 at 2:14 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > This adds support for fine derivatives and enables > ARB_derivative_control on radeonsi. > > (just fell out of my working out interpolation) > > v2: cleanup some bits, write a comment > v2.1: take Michel's comment from the mailing list > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > docs/GL3.txt | 2 +- > docs/relnotes/10.7.0.html | 1 + > src/gallium/drivers/radeonsi/si_pipe.c | 2 +- > src/gallium/drivers/radeonsi/si_shader.c | 52 > +++++++++++++++++++++++++++++--- > 4 files changed, 50 insertions(+), 7 deletions(-) > > diff --git a/docs/GL3.txt b/docs/GL3.txt > index e3fa1a1..15bb57f 100644 > --- a/docs/GL3.txt > +++ b/docs/GL3.txt > @@ -191,7 +191,7 @@ GL 4.5, GLSL 4.50: > GL_ARB_clip_control DONE (i965, nv50, > nvc0, r600, radeonsi, llvmpipe, softpipe) > GL_ARB_conditional_render_inverted DONE (i965, nv50, > nvc0, llvmpipe, softpipe) > GL_ARB_cull_distance in progress (Tobias) > - GL_ARB_derivative_control DONE (i965, nv50, > nvc0, r600) > + GL_ARB_derivative_control DONE (i965, nv50, > nvc0, r600, radeonsi) > GL_ARB_direct_state_access DONE (all drivers) > GL_ARB_get_texture_sub_image DONE (all drivers) > GL_ARB_shader_texture_image_samples not started > diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html > index 26615a8..afef525 100644 > --- a/docs/relnotes/10.7.0.html > +++ b/docs/relnotes/10.7.0.html > @@ -45,6 +45,7 @@ Note: some of the new features are only available with > certain drivers. > > <ul> > <li>GL_AMD_vertex_shader_viewport_index on radeonsi</li> > +<li>GL_ARB_derivative_control on radeonsi</li> > <li>GL_ARB_fragment_layer_viewport on radeonsi</li> > <li>GL_ARB_framebuffer_no_attachments on i965</li> > <li>GL_ARB_get_texture_sub_image for all drivers</li> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index c2985b8..ebe1f5a 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -249,6 +249,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum > pipe_cap param) > case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: > case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: > case PIPE_CAP_TGSI_TEXCOORD: > + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: > return 1; > > case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: > @@ -289,7 +290,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum > pipe_cap param) > case PIPE_CAP_USER_VERTEX_BUFFERS: > case PIPE_CAP_FAKE_SW_MSAA: > case PIPE_CAP_TEXTURE_GATHER_OFFSETS: > - case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: > case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: > case PIPE_CAP_SAMPLER_VIEW_TARGET: > case PIPE_CAP_VERTEXID_NOBASE: > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 81f7bdb..fee427f 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -2841,6 +2841,35 @@ static void build_txq_intrinsic(const struct > lp_build_tgsi_action * action, > } > } > > +/* > + * SI implements derivatives using the local data store (LDS) > + * All writes to the LDS happen in all executing threads at > + * the same time. TID is the Thread ID for the current > + * thread and is a value between 0 and 63, representing > + * the thread's position in the wavefront. > + * > + * For the pixel shader threads are grouped into quads of four pixels. > + * The TIDs of the pixels of a quad are: > + * > + * +------+------+ > + * |4n + 0|4n + 1| > + * +------+------+ > + * |4n + 2|4n + 3| > + * +------+------+ > + * > + * So, masking the TID with 0xfffffffc yields the TID of the top left pixel > + * of the quad, masking with 0xfffffffd yields the TID of the top pixel of > + * the current pixel's column, and masking with 0xfffffffe yields the TID > + * of the left pixel of the current pixel's row. > + * > + * Adding 1 yields the TID of the pixel to the right of the left pixel, and > + * adding 2 yields the TID of the pixel below the top pixel. > + */ > +/* masks for thread ID. */ > +#define TID_MASK_TOP_LEFT 0xfffffffc > +#define TID_MASK_TOP 0xfffffffd > +#define TID_MASK_LEFT 0xfffffffe > + > static void si_llvm_emit_ddxy( > const struct lp_build_tgsi_action * action, > struct lp_build_tgsi_context * bld_base, > @@ -2857,6 +2886,8 @@ static void si_llvm_emit_ddxy( > LLVMTypeRef i32; > unsigned swizzle[4]; > unsigned c; > + int idx; > + unsigned mask; > > i32 = LLVMInt32TypeInContext(gallivm->context); > > @@ -2866,15 +2897,22 @@ static void si_llvm_emit_ddxy( > store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds, > indices, 2, ""); > > + if (opcode == TGSI_OPCODE_DDX_FINE) > + mask = TID_MASK_LEFT; > + else if (opcode == TGSI_OPCODE_DDY_FINE) > + mask = TID_MASK_TOP; > + else > + mask = TID_MASK_TOP_LEFT; > + > indices[1] = LLVMBuildAnd(gallivm->builder, indices[1], > - lp_build_const_int32(gallivm, 0xfffffffc), > ""); > + lp_build_const_int32(gallivm, mask), ""); > load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds, > indices, 2, ""); > > + /* for DDX we want to next X pixel, DDY next Y pixel. */ > + idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? > 1 : 2; > indices[1] = LLVMBuildAdd(gallivm->builder, indices[1], > - lp_build_const_int32(gallivm, > - opcode == > TGSI_OPCODE_DDX ? 1 : 2), > - ""); > + lp_build_const_int32(gallivm, idx), ""); > load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds, > indices, 2, ""); > > @@ -3216,7 +3254,9 @@ static void create_function(struct si_shader_context > *si_shader_ctx) > > if (bld_base->info && > (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || > - bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0)) > + bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 || > + bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 || > + bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0)) > si_shader_ctx->lds = > LLVMAddGlobalInAddressSpace(gallivm->module, > LLVMArrayType(i32, 64), > @@ -3709,6 +3749,8 @@ int si_shader_create(struct si_screen *sscreen, > LLVMTargetMachineRef tm, > > bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; > bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; > + bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy; > + bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy; > > bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; > bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = > si_llvm_emit_primitive; > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev