From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 docs/features.txt                        |  2 +-
 docs/relnotes/17.1.0.html                |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |  4 +-
 src/gallium/drivers/radeonsi/si_shader.c | 82 ++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index d707f01..1e145e1 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -288,21 +288,21 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_parallel_shader_compile                        not started, but 
Chia-I Wu did some related work in 2014
   GL_ARB_pipeline_statistics_query                      DONE (i965, nvc0, 
radeonsi, softpipe, swr)
   GL_ARB_post_depth_coverage                            DONE (i965)
   GL_ARB_robustness_isolation                           not started
   GL_ARB_sample_locations                               not started
   GL_ARB_seamless_cubemap_per_texture                   DONE (i965, nvc0, 
radeonsi, r600, softpipe, swr)
   GL_ARB_shader_atomic_counter_ops                      DONE (i965/gen7+, 
nvc0, radeonsi, softpipe)
   GL_ARB_shader_ballot                                  not started
   GL_ARB_shader_clock                                   DONE (i965/gen7+, 
radeonsi)
   GL_ARB_shader_draw_parameters                         DONE (i965, nvc0, 
radeonsi)
-  GL_ARB_shader_group_vote                              DONE (nvc0)
+  GL_ARB_shader_group_vote                              DONE (nvc0, radeonsi)
   GL_ARB_shader_stencil_export                          DONE (i965/gen9+, 
radeonsi, softpipe, llvmpipe, swr)
   GL_ARB_shader_viewport_layer_array                    DONE (i965/gen6+)
   GL_ARB_sparse_buffer                                  not started
   GL_ARB_sparse_texture                                 not started
   GL_ARB_sparse_texture2                                not started
   GL_ARB_sparse_texture_clamp                           not started
   GL_ARB_texture_filter_minmax                          not started
   GL_ARB_transform_feedback_overflow_query              DONE (i965/gen6+)
   GL_KHR_blend_equation_advanced_coherent               DONE (i965/gen9+)
   GL_KHR_no_error                                       not started
diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index 52b35b5..38bc1e8 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -39,20 +39,21 @@ TBD.
 
 <h2>New features</h2>
 
 <p>
 Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
 <li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, 
llvmpipe</li>
 <li>GL_ARB_shader_clock on radeonsi</li>
+<li>GL_ARB_shader_group_vote on radeonsi</li>
 <li>GL_ARB_transform_feedback2 on i965/gen6</li>
 <li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li>
 <li>Geometry shaders enabled on swr</li>
 </ul>
 
 <h2>Bug fixes</h2>
 
 <ul>
 </ul>
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 6944c7c..688900e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -417,20 +417,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
        case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
        case PIPE_CAP_DOUBLES:
        case PIPE_CAP_TGSI_TEX_TXF_LZ:
                return 1;
 
        case PIPE_CAP_INT64:
        case PIPE_CAP_INT64_DIVMOD:
        case PIPE_CAP_TGSI_CLOCK:
                return HAVE_LLVM >= 0x0309;
 
+       case PIPE_CAP_TGSI_VOTE:
+               return HAVE_LLVM >= 0x0400;
+
        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
                return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
 
        case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
                return (sscreen->b.info.drm_major == 2 &&
                        sscreen->b.info.drm_minor >= 43) ||
                       sscreen->b.info.drm_major == 3;
 
        case PIPE_CAP_TEXTURE_MULTISAMPLE:
                /* 2D tiling on CIK is supported since DRM 2.35.0 */
@@ -471,21 +474,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
 
        /* Unsupported features. */
        case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
        case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
        case PIPE_CAP_USER_VERTEX_BUFFERS:
        case PIPE_CAP_FAKE_SW_MSAA:
        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
        case PIPE_CAP_VERTEXID_NOBASE:
        case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
-       case PIPE_CAP_TGSI_VOTE:
        case PIPE_CAP_MAX_WINDOW_RECTANGLES:
        case PIPE_CAP_NATIVE_FENCE_FD:
        case PIPE_CAP_TGSI_FS_FBFETCH:
        case PIPE_CAP_TGSI_MUL_ZERO_WINS:
        case PIPE_CAP_UMA:
                return 0;
 
        case PIPE_CAP_QUERY_BUFFER_OBJECT:
                return si_have_tgsi_compute(sscreen);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 415d13b..737d005 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5058,20 +5058,98 @@ static void build_interp_intrinsic(const struct 
lp_build_tgsi_action *action,
                                llvm_chan, attr_number, params,
                                i, j);
                } else {
                        emit_data->output[chan] = 
ac_build_fs_interp_mov(&ctx->ac,
                                lp_build_const_int32(gallivm, 2), /* P0 */
                                llvm_chan, attr_number, params);
                }
        }
 }
 
+static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
+                                  LLVMValueRef value)
+{
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef args[3] = {
+               value,
+               ctx->i32_0,
+               LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+       };
+
+       if (LLVMTypeOf(value) != ctx->i32)
+               args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, 
"");
+
+       return lp_build_intrinsic(gallivm->builder,
+                                 "llvm.amdgcn.icmp.i32",
+                                 ctx->i64, args, 3,
+                                 LP_FUNC_ATTR_NOUNWIND |
+                                 LP_FUNC_ATTR_READNONE |
+                                 LP_FUNC_ATTR_CONVERGENT);
+}
+
+static void vote_all_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef active_set, vote_set;
+       LLVMValueRef tmp;
+
+       active_set = si_emit_ballot(ctx, ctx->i32_1);
+       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+       tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_any_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef vote_set;
+       LLVMValueRef tmp;
+
+       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+       tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+                           vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_eq_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef active_set, vote_set;
+       LLVMValueRef all, none, tmp;
+
+       active_set = si_emit_ballot(ctx, ctx->i32_1);
+       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+       all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
+       none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+                            vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+       tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
                                       struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
        LLVMValueRef imm;
        unsigned stream;
 
        assert(src0.File == TGSI_FILE_IMMEDIATE);
 
@@ -6501,20 +6579,24 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
 
        bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
 
        bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
 
        bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
        bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
        bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
        bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
 
+       bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
+       bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
+       bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
+
        bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
        bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
        bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
 #define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
 #define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
 
 /* Return true if the PARAM export has been eliminated. */
 static bool si_eliminate_const_output(struct si_shader_context *ctx,
-- 
2.9.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to