From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- docs/features.txt | 2 +- docs/relnotes/17.1.0.html | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 4 +- src/gallium/drivers/radeonsi/si_shader.c | 82 ++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 2 deletions(-)
diff --git a/docs/features.txt b/docs/features.txt index d707f01..1e145e1 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -288,21 +288,21 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014 GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, softpipe, swr) GL_ARB_post_depth_coverage DONE (i965) GL_ARB_robustness_isolation not started GL_ARB_sample_locations not started GL_ARB_seamless_cubemap_per_texture DONE (i965, nvc0, radeonsi, r600, softpipe, swr) GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, radeonsi, softpipe) GL_ARB_shader_ballot not started GL_ARB_shader_clock DONE (i965/gen7+, radeonsi) GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi) - GL_ARB_shader_group_vote DONE (nvc0) + GL_ARB_shader_group_vote DONE (nvc0, radeonsi) GL_ARB_shader_stencil_export DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr) GL_ARB_shader_viewport_layer_array DONE (i965/gen6+) GL_ARB_sparse_buffer not started GL_ARB_sparse_texture not started GL_ARB_sparse_texture2 not started GL_ARB_sparse_texture_clamp not started GL_ARB_texture_filter_minmax not started GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+) GL_KHR_blend_equation_advanced_coherent DONE (i965/gen9+) GL_KHR_no_error not started diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html index 52b35b5..38bc1e8 100644 --- a/docs/relnotes/17.1.0.html +++ b/docs/relnotes/17.1.0.html @@ -39,20 +39,21 @@ TBD. <h2>New features</h2> <p> Note: some of the new features are only available with certain drivers. </p> <ul> <li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe</li> <li>GL_ARB_shader_clock on radeonsi</li> +<li>GL_ARB_shader_group_vote on radeonsi</li> <li>GL_ARB_transform_feedback2 on i965/gen6</li> <li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li> <li>Geometry shaders enabled on swr</li> </ul> <h2>Bug fixes</h2> <ul> </ul> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 6944c7c..688900e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -417,20 +417,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: case PIPE_CAP_DOUBLES: case PIPE_CAP_TGSI_TEX_TXF_LZ: return 1; case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_TGSI_CLOCK: return HAVE_LLVM >= 0x0309; + case PIPE_CAP_TGSI_VOTE: + return HAVE_LLVM >= 0x0400; + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr; case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return (sscreen->b.info.drm_major == 2 && sscreen->b.info.drm_minor >= 43) || sscreen->b.info.drm_major == 3; case PIPE_CAP_TEXTURE_MULTISAMPLE: /* 2D tiling on CIK is supported since DRM 2.35.0 */ @@ -471,21 +474,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) /* Unsupported features. */ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: - case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_MAX_WINDOW_RECTANGLES: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_TGSI_MUL_ZERO_WINS: case PIPE_CAP_UMA: return 0; case PIPE_CAP_QUERY_BUFFER_OBJECT: return si_have_tgsi_compute(sscreen); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 415d13b..737d005 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5058,20 +5058,98 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, llvm_chan, attr_number, params, i, j); } else { emit_data->output[chan] = ac_build_fs_interp_mov(&ctx->ac, lp_build_const_int32(gallivm, 2), /* P0 */ llvm_chan, attr_number, params); } } } +static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx, + LLVMValueRef value) +{ + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMValueRef args[3] = { + value, + ctx->i32_0, + LLVMConstInt(ctx->i32, LLVMIntNE, 0) + }; + + if (LLVMTypeOf(value) != ctx->i32) + args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, ""); + + return lp_build_intrinsic(gallivm->builder, + "llvm.amdgcn.icmp.i32", + ctx->i64, args, 3, + LP_FUNC_ATTR_NOUNWIND | + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_CONVERGENT); +} + +static void vote_all_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMValueRef active_set, vote_set; + LLVMValueRef tmp; + + active_set = si_emit_ballot(ctx, ctx->i32_1); + vote_set = si_emit_ballot(ctx, emit_data->args[0]); + + tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, ""); + emit_data->output[emit_data->chan] = + LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, ""); +} + +static void vote_any_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMValueRef vote_set; + LLVMValueRef tmp; + + vote_set = si_emit_ballot(ctx, emit_data->args[0]); + + tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE, + vote_set, LLVMConstInt(ctx->i64, 0, 0), ""); + emit_data->output[emit_data->chan] = + LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, ""); +} + +static void vote_eq_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMValueRef active_set, vote_set; + LLVMValueRef all, none, tmp; + + active_set = si_emit_ballot(ctx, ctx->i32_1); + vote_set = si_emit_ballot(ctx, emit_data->args[0]); + + all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, ""); + none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, + vote_set, LLVMConstInt(ctx->i64, 0, 0), ""); + tmp = LLVMBuildOr(gallivm->builder, all, none, ""); + emit_data->output[emit_data->chan] = + LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, ""); +} + static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct tgsi_src_register src0 = emit_data->inst->Src[0].Register; LLVMValueRef imm; unsigned stream; assert(src0.File == TGSI_FILE_IMMEDIATE); @@ -6501,20 +6579,24 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit; bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit; bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy; + bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit; + bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit; + bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit; + bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive; bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; } #define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3) #define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5) /* Return true if the PARAM export has been eliminated. */ static bool si_eliminate_const_output(struct si_shader_context *ctx, -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev