On 10.06.2017 02:26, Connor Abbott wrote:
On Fri, Jun 9, 2017 at 5:04 PM, Bas Nieuwenhuizen
<b...@basnieuwenhuizen.nl> wrote:
On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott
<conn...@valvesoftware.com> wrote:
From: Connor Abbott <cwabbo...@gmail.com>

Signed-off-by: Connor Abbott <cwabbo...@gmail.com>
  src/amd/common/ac_nir_to_llvm.c | 75 +++++++++++++++++++++++++++++++++++++++++
  src/amd/vulkan/radv_device.c    |  8 +++++
  src/amd/vulkan/radv_pipeline.c  |  2 ++
  3 files changed, 85 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5bbd1c5..111e575 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct nir_to_llvm_context 
         case nir_intrinsic_load_patch_vertices_in:
                 result = LLVMConstInt(ctx->i32, 
ctx->options->key.tcs.input_vertices, false);
+       case nir_intrinsic_ballot:
+               result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+               break;
+       case nir_intrinsic_read_first_invocation: {
+               LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+               ac_build_optimization_barrier(&ctx->ac, &src0);
+               LLVMValueRef srcs[1] = { src0 };
+               result = ac_build_intrinsic(&ctx->ac, 
+                                           ctx->i32, srcs, 1,
+                                           AC_FUNC_ATTR_NOUNWIND |
+                                           AC_FUNC_ATTR_READNONE |
+                                           AC_FUNC_ATTR_CONVERGENT);
+               break;
+        }
+       case nir_intrinsic_read_invocation: {
+               LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+               ac_build_optimization_barrier(&ctx->ac, &src0);
+               LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
+               result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
+                                           ctx->i32, srcs, 2,
+                                           AC_FUNC_ATTR_NOUNWIND |
+                                           AC_FUNC_ATTR_READNONE |
+                                           AC_FUNC_ATTR_CONVERGENT);
+               break;
+        }
+       case nir_intrinsic_load_subgroup_invocation:
+               result = ac_get_thread_id(&ctx->ac);
+               break;
+       case nir_intrinsic_load_subgroup_size:
+               result = LLVMConstInt(ctx->i32, 64, 0);
+               break;
+       case nir_intrinsic_all_invocations:
+               result = LLVMBuildSExt(ctx->builder,
+                                      ac_build_vote_all(&ctx->ac,
+                                                        get_src(ctx, 
+                                      ctx->i32, "");

How well does LLVM optimize this? I've always found the boolean as
int32 with -1 and 0 an awkward mapping to LLVM, and am wondering
whether LLVM is able to optimize the SExt away or if a select might be

 From looking at the shader dump of my test, LLVM seems to be able to
optimize it away. In fact, it's what radeonsi uses for all their
comparisons (since TGSI also uses -1 for true), so I'd expect it to be
at least as good as a select; it might be better.

It might be interesting to make booleans have a bit-size of 1, like in
LLVM... it would probably require a lot of churn, though.

If NIR ever allows 1-bit channels, it'd make sense. Apart from that, it's just not important, because InstCombine seems to be able to optimize it all away.


+               break;
+       case nir_intrinsic_any_invocations:
+               result = LLVMBuildSExt(ctx->builder,
+                                      ac_build_vote_any(&ctx->ac,
+                                                        get_src(ctx, 
+                                      ctx->i32, "");
+               break;
+       case nir_intrinsic_all_invocations_equal:
+               result = LLVMBuildSExt(ctx->builder,
+                                      ac_build_vote_eq(&ctx->ac,
+                                                        get_src(ctx, 
+                                      ctx->i32, "");
+               break;
+       case nir_intrinsic_load_subgroup_eq_mask: {
+               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
+               id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
+               result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 1, 0), id, 
+               break;
+       }
+       case nir_intrinsic_load_subgroup_ge_mask:
+       case nir_intrinsic_load_subgroup_gt_mask:
+       case nir_intrinsic_load_subgroup_le_mask:
+       case nir_intrinsic_load_subgroup_lt_mask: {
+               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
+               if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask ||
+                   instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) {
+                       /* All bits set except LSB */
+                       result = LLVMConstInt(ctx->i64, -2, 0);
+               } else {
+                       /* All bits set */
+                       result = LLVMConstInt(ctx->i64, -1, 0);
+               }
+               id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
+               result = LLVMBuildShl(ctx->builder, result, id, "");
+               if (instr->intrinsic == nir_intrinsic_load_subgroup_le_mask ||
+                   instr->intrinsic == nir_intrinsic_load_subgroup_lt_mask)
+                       result = LLVMBuildNot(ctx->builder, result, "");
+               break;
+       }
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index e9bf44c..ea50acc 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -127,6 +127,14 @@ static const VkExtensionProperties 
common_device_extensions[] = {
                 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
                 .specVersion = 1,
+       {
+               .extensionName = VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
+               .specVersion = 1,
+       },
+       {
+               .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+               .specVersion = 1,
+       },

  static VkResult
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 39cbd5a..242890a 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
                         .image_write_without_format = true,
                         .tessellation = true,
                         .int64 = true,
+                       .shader_ballot = true,
+                       .shader_group_vote = true,
                 entry_point = spirv_to_nir(spirv, module->size / 4,
                                            spec_entries, num_spec_entries,

mesa-dev mailing list
mesa-dev mailing list
mesa-dev mailing list

Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
mesa-dev mailing list

Reply via email to