From: Tom Stellard <thomas.stell...@amd.com>

The TGSI->LLVM pass for radeonsi preloads constants and relies on LLVM's
sinking pass to reduce SGPR usage by lowering constant reads to an
optimal place in the code.  However, LLVM's machine sink pass will not
lower instructions that have been selected from llvm.SI.load.const
intrinsics, because these instructions do not have a MachineMemOperand,
which LLVM needs in order to determine whether or not it is safe to sink
a load.  Replacing this intrinsic with a real load instruction will
enable the sinking optimization and probably a few others.

The other advantages of using pointers are:
+ Reduced register usage (pointers take 2 registers, descriptors take 4)
+ More code sharing with compute

This should also fix some crashes due to the compiler running out of
registers like in this bug:

https://bugs.freedesktop.org/show_bug.cgi?id=66805
---
 src/gallium/drivers/radeonsi/radeonsi_shader.c | 26 ++++++++++++++++++++------
 src/gallium/drivers/radeonsi/si_state_draw.c   |  4 ++++
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c 
b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 4d8a479..eb63fc9 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -115,20 +115,26 @@ static LLVMValueRef build_indexed_load(
        return result;
 }
 
-static LLVMValueRef build_constant_load(
+static LLVMValueRef build_load_constant(
        struct si_shader_context * si_shader_ctx,
        LLVMValueRef base_ptr,
        LLVMValueRef offset)
 {
        struct lp_build_context * base =
                                &si_shader_ctx->radeon_bld.soa.bld_base.base;
+#if HAVE_LLVM <= 0x0303
        LLVMValueRef args[2];
        args[0] = base_ptr;
        args[1] = offset;
        return build_intrinsic(base->gallivm->builder, "llvm.SI.load.const",
                                base->elem_type, args, 2,
                                LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-
+#else
+       LLVMValueRef dword_offset = LLVMBuildUDiv(base->gallivm->builder,
+                               offset,
+                               lp_build_const_int32(base->gallivm, 4), "");
+       return build_indexed_load(si_shader_ctx, base_ptr, dword_offset);
+#endif
 }
 
 static LLVMValueRef get_instance_index(
@@ -450,7 +456,7 @@ static LLVMValueRef fetch_constant(
        addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
        args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]);
 
-       result = build_constant_load(si_shader_ctx, args[0], args[1]);
+       result = build_load_constant(si_shader_ctx, args[0], args[1]);
 
        return bitcast(bld_base, type, result);
 }
@@ -609,7 +615,7 @@ static void si_llvm_emit_clipvertex(struct 
lp_build_tgsi_context * bld_base,
                                args[1] = lp_build_const_int32(base->gallivm,
                                                               ((reg_index * 4 
+ chan) * 4 +
                                                                const_chan) * 
4);
-                               base_elt = build_constant_load(si_shader_ctx, 
args[0], args[1]);
+                               base_elt = build_load_constant(si_shader_ctx, 
args[0], args[1]);
                                args[5 + chan] =
                                        lp_build_add(base, args[5 + chan],
                                                     lp_build_mul(base, 
base_elt,
@@ -1215,8 +1221,16 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
        v2i32 = LLVMVectorType(i32, 2);
        v3i32 = LLVMVectorType(i32, 3);
 
-       params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), 
CONST_ADDR_SPACE);
+#if HAVE_LLVM <= 0x0303
+       params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16),
+                                                       CONST_ADDR_SPACE);
        params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST];
+#else
+       params[SI_PARAM_CONST] = LLVMPointerType(LLVMPointerType(f32,
+                                CONST_ADDR_SPACE), CONST_ADDR_SPACE);
+       params[SI_PARAM_SAMPLER] = LLVMPointerType(LLVMVectorType(i8, 16),
+                                                       CONST_ADDR_SPACE);
+#endif
        params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), 
CONST_ADDR_SPACE);
 
        if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
@@ -1298,7 +1312,7 @@ static void preload_constants(struct si_shader_context 
*si_shader_ctx)
                        si_shader_ctx->const_resource,
                        lp_build_const_int32(gallivm, i * 4)
                };
-               si_shader_ctx->constants[i] = build_constant_load(si_shader_ctx,
+               si_shader_ctx->constants[i] = build_load_constant(si_shader_ctx,
                                                        args[0], args[1]);
        }
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 29d960d..efbee0d 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -494,6 +494,7 @@ static void si_constant_buffer_update(struct r600_context 
*rctx)
                                si_pm4_sh_data_add(pm4, va);
                                si_pm4_sh_data_add(pm4, 
(S_008F04_BASE_ADDRESS_HI(va >> 32) |
                                                         S_008F04_STRIDE(0)));
+#if HAVE_LLVM <= 0x0303
                                si_pm4_sh_data_add(pm4, cb->buffer_size);
                                si_pm4_sh_data_add(pm4, 
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                                                   
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
@@ -501,12 +502,15 @@ static void si_constant_buffer_update(struct r600_context 
*rctx)
                                                   
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
                                                   
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
                                                   
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32));
+#endif
                        } else {
                                /* Fill in an empty T# buffer resource 
description */
                                si_pm4_sh_data_add(pm4, 0);
                                si_pm4_sh_data_add(pm4, 0);
+#if HAVE_LLVM <= 0x0303
                                si_pm4_sh_data_add(pm4, 0);
                                si_pm4_sh_data_add(pm4, 0);
+#endif
                        }
                }
 
-- 
1.8.1.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to