Advantages of using llvm.amdgcn.s.buffer.load

- We can use a real pointer type, which LLVM can better reason about and do
  alias analysis on.  This will also ease the transition to using fat pointers
  and LLVM IR loads.

- llvm.amdgcn.s.buffer.load is defined in IntrinsicsAMDGPU.td so passes can
  query information about it other than just its attributes.
---
 src/gallium/auxiliary/gallivm/lp_bld_intr.c        |  1 +
 src/gallium/auxiliary/gallivm/lp_bld_intr.h        |  3 +-
 src/gallium/drivers/radeonsi/si_shader.c           | 48 +++++++++++++++++-----
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  8 ++++
 .../drivers/radeonsi/si_shader_tgsi_setup.c        |  6 +++
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..dc8de55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -144,6 +144,7 @@ static const char *attr_to_str(enum lp_func_attr attr)
 {
    switch (attr) {
    case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+   case LP_FUNC_ATTR_ARGMEMONLY: return "argmemonly";
    case LP_FUNC_ATTR_BYVAL: return "byval";
    case LP_FUNC_ATTR_INREG: return "inreg";
    case LP_FUNC_ATTR_NOALIAS: return "noalias";
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index f1e075a..7c8f09b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -54,7 +54,8 @@ enum lp_func_attr {
    LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
    LP_FUNC_ATTR_READNONE     = (1 << 5),
    LP_FUNC_ATTR_READONLY     = (1 << 6),
-   LP_FUNC_ATTR_LAST         = (1 << 7)
+   LP_FUNC_ATTR_ARGMEMONLY   = (1 << 7),
+   LP_FUNC_ATTR_LAST         = (1 << 8)
 };
 
 void
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a6de7c4..cf13cb5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -93,11 +93,6 @@ static void si_build_ps_epilog_function(struct 
si_shader_context *ctx,
  */
 #define VS_EPILOG_PRIMID_LOC 2
 
-enum {
-       CONST_ADDR_SPACE = 2,
-       LOCAL_ADDR_SPACE = 3,
-};
-
 #define SENDMSG_GS 2
 #define SENDMSG_GS_DONE 3
 
@@ -360,8 +355,21 @@ static LLVMValueRef build_indexed_load_const(
        struct si_shader_context *ctx,
        LLVMValueRef base_ptr, LLVMValueRef index)
 {
+       LLVMTypeRef ptr_type = LLVMTypeOf(base_ptr);
+       LLVMTypeRef elem_type = LLVMGetElementType(ptr_type);
+       LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type);
        LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
        LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+
+       /* Set !dereferenceable metadata */
+       if (elem_kind == LLVMPointerTypeKind ||
+               (elem_kind == LLVMArrayTypeKind && 
LLVMGetTypeKind(LLVMGetElementType(elem_type)) == LLVMPointerTypeKind)) {
+               LLVMValueRef deref_bytes, deref_md;
+               deref_bytes = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+               deref_md = LLVMMDNodeInContext(LLVMGetTypeContext(ptr_type),
+                                               &deref_bytes, 1);
+               LLVMSetMetadata(result, ctx->dereferenceable_md_kind, deref_md);
+       }
        return result;
 }
 
@@ -1571,16 +1579,34 @@ static LLVMValueRef get_thread_id(struct 
si_shader_context *ctx)
 
 /**
  * Load a dword from a constant buffer.
+ * @param offset This is a byte offset.
+ * @returns An LLVMValueRef with f32 type.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
                                      LLVMValueRef resource,
                                      LLVMValueRef offset)
 {
        LLVMBuilderRef builder = ctx->gallivm.builder;
-       LLVMValueRef args[2] = {resource, offset};
+       LLVMValueRef load;
+       LLVMValueRef args[3] = {resource, offset, LLVMConstInt(ctx->i1, 0, 0) };
+       LLVMTypeRef resource_type = LLVMTypeOf(resource);
+       LLVMTypeKind resource_kind = LLVMGetTypeKind(resource_type);
+
+       /* XXX: We can have a non-pointer resource if we do a constant load
+         * from the RW_BUFFERS whicha are still represented using the <16 x i8>
+         * type. We can eliminate this once we start using pointer types for
+        * those buffers.
+        */
+       if (resource_kind != LLVMPointerTypeKind) {
+               return lp_build_intrinsic(builder, "llvm.SI.load.const",
+                                         ctx->f32, args, 2,
+                                         LP_FUNC_ATTR_READNONE);
+       }
 
-       return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, 
args, 2,
-                              LP_FUNC_ATTR_READNONE);
+       load = lp_build_intrinsic(builder, "llvm.amdgcn.s.buffer.load.i32",
+                                 ctx->i32, args, 3,
+                                 LP_FUNC_ATTR_READONLY | 
LP_FUNC_ATTR_ARGMEMONLY);
+       return LLVMBuildBitCast(builder, load, ctx->f32, "");
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, 
LLVMValueRef sample_id)
@@ -5504,9 +5530,10 @@ static void create_meta_data(struct si_shader_context 
*ctx)
                                                               
"invariant.load", 14);
        ctx->range_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
                                                     "range", 5);
+       ctx->dereferenceable_md_kind = LLVMGetMDKindIDInContext(
+               gallivm->context, "dereferenceable", 15);
        ctx->uniform_md_kind = LLVMGetMDKindIDInContext(gallivm->context,
                                                        "amdgpu.uniform", 14);
-
        ctx->empty_md = LLVMMDNodeInContext(gallivm->context, NULL, 0);
 }
 
@@ -5601,7 +5628,7 @@ static void create_function(struct si_shader_context *ctx)
        v3i32 = LLVMVectorType(ctx->i32, 3);
 
        params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, 
SI_NUM_RW_BUFFERS);
-       params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, 
SI_NUM_CONST_BUFFERS);
+       params[SI_PARAM_CONST_BUFFERS] = 
const_array(ctx->const_buffer_rsrc_type, SI_NUM_CONST_BUFFERS);
        params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
        params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
        params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, 
SI_NUM_SHADER_BUFFERS);
@@ -7722,6 +7749,7 @@ si_get_shader_part(struct si_screen *sscreen,
        struct gallivm_state *gallivm = &ctx.gallivm;
 
        si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+       create_meta_data(&ctx);
        ctx.type = type;
 
        switch (type) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 9055b4d..943b9a0 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -127,6 +127,7 @@ struct si_shader_context {
        unsigned range_md_kind;
        unsigned uniform_md_kind;
        unsigned fpmath_md_kind;
+       unsigned dereferenceable_md_kind;
        LLVMValueRef fpmath_md_2p5_ulp;
        LLVMValueRef empty_md;
 
@@ -150,10 +151,17 @@ struct si_shader_context {
        LLVMTypeRef v4i32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
+       LLVMTypeRef const_buffer_rsrc_type;
 
        LLVMValueRef shared_memory;
 };
 
+enum {
+       CONST_ADDR_SPACE = 2,
+       LOCAL_ADDR_SPACE = 3,
+       CONST_ADDR_SPACE_W_RSRC = 42,
+};
+
 static inline struct si_shader_context *
 si_shader_context(struct lp_build_tgsi_context *bld_base)
 {
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 205686a..7a54e74 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1364,6 +1364,12 @@ void si_llvm_context_init(struct si_shader_context *ctx,
        ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
        ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
        ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+       ctx->const_buffer_rsrc_type = ctx->v16i8;
+
+       if (HAVE_LLVM >= 0x0500) {
+               ctx->const_buffer_rsrc_type =
+                       LLVMPointerType(ctx->i32, CONST_ADDR_SPACE_W_RSRC);
+       }
 }
 
 void si_llvm_create_func(struct si_shader_context *ctx,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to