From: Dave Airlie <airl...@redhat.com>

Noticed while looking at Sascha Willems deferred shaders.

This is a bit of an llvm workaround, llvm was producing this:
        v_cvt_pkrtz_f16_f32_e64 v4, v7, v8                       ; D2960004 
00021107
        v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0                      ; D2960006 
0001E509
        s_waitcnt vmcnt(0)                                       ; BF8C0F70
        exp mrt0 v4, v4, v6, v6 compr                            ; C400040F 
00000604
        s_waitcnt expcnt(0)                                      ; BF8C0F0F
        v_cvt_pkrtz_f16_f32_e64 v4, v12, v5                      ; D2960004 
00020B0C
        v_cvt_pkrtz_f16_f32_e64 v5, v14, 1.0                     ; D2960005 
0001E50E
        exp mrt1 v4, v4, v5, v5 compr                            ; C400041F 
00000504
        s_waitcnt expcnt(0)                                      ; BF8C0F0F
        v_cvt_pkrtz_f16_f32_e64 v0, v0, v1                       ; D2960000 
00020300
        v_cvt_pkrtz_f16_f32_e64 v1, v2, v3                       ; D2960001 
00020702
        exp mrt2 v0, v0, v1, v1 done compr vm                    ; C4001C2F 
00000100

After this change:
        v_cvt_pkrtz_f16_f32_e64 v4, v7, v8                       ; D2960004 
00021107
        s_waitcnt vmcnt(0)                                       ; BF8C0F70
        v_cvt_pkrtz_f16_f32_e64 v0, v0, v1                       ; D2960000 
00020300
        v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0                      ; D2960006 
0001E509
        v_cvt_pkrtz_f16_f32_e64 v5, v12, v5                      ; D2960005 
00020B0C
        v_cvt_pkrtz_f16_f32_e64 v7, v14, 1.0                     ; D2960007 
0001E50E
        exp mrt0 v4, v4, v6, v6 compr                            ; C400040F 
00000604
        v_cvt_pkrtz_f16_f32_e64 v1, v2, v3                       ; D2960001 
00020702
        exp mrt1 v5, v5, v7, v7 compr                            ; C400041F 
00000705
        exp mrt2 v0, v0, v1, v1 done compr vm                    ; C4001C2F 
00000100

No waitcnt for exports are emitted.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/amd/common/ac_nir_to_llvm.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 38d5359..4d2e469 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5572,24 +5572,22 @@ handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
        write_tess_factors(ctx);
 }
 
-static void
+static bool
 si_export_mrt_color(struct nir_to_llvm_context *ctx,
-                   LLVMValueRef *color, unsigned param, bool is_last)
+                   LLVMValueRef *color, unsigned param, bool is_last,
+                   struct ac_export_args *args)
 {
-
-       struct ac_export_args args;
-
        /* Export */
        si_llvm_init_export_args(ctx, color, param,
-                                &args);
+                                args);
 
        if (is_last) {
-               args.valid_mask = 1; /* whether the EXEC mask is valid */
-               args.done = 1; /* DONE bit */
-       } else if (!args.enabled_channels)
-               return; /* unnecessary NULL export */
+               args->valid_mask = 1; /* whether the EXEC mask is valid */
+               args->done = 1; /* DONE bit */
+       } else if (!args->enabled_channels)
+               return false; /* unnecessary NULL export */
 
-       ac_build_export(&ctx->ac, &args);
+       return true;
 }
 
 static void
@@ -5639,6 +5637,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
 {
        unsigned index = 0;
        LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+       struct ac_export_args color_args[8];
 
        for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
                LLVMValueRef values[4];
@@ -5667,15 +5666,20 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
                        if (!ctx->shader_info->fs.writes_z && 
!ctx->shader_info->fs.writes_stencil && 
!ctx->shader_info->fs.writes_sample_mask)
                                last = ctx->output_mask <= ((1ull << (i + 1)) - 
1);
 
-                       si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + 
index, last);
-                       index++;
+                       bool ret = si_export_mrt_color(ctx, values, 
V_008DFC_SQ_EXP_MRT + index, last, &color_args[index]);
+                       if (ret)
+                               index++;
                }
        }
 
+       for (unsigned i = 0; i < index; i++)
+               ac_build_export(&ctx->ac, &color_args[i]);
        if (depth || stencil || samplemask)
                si_export_mrt_z(ctx, depth, stencil, samplemask);
-       else if (!index)
-               si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
+       else if (!index) {
+               si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, 
&color_args[0]);
+               ac_build_export(&ctx->ac, &color_args[0]);
+       }
 
        ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
 }
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to