Advantages of using llvm.amdgcn.s.buffer.load - We can use a real pointer type, which LLVM can better reason about and do alias analysis on. This will also ease the transition to using fat pointers and LLVM IR loads.
- llvm.amdgcn.s.buffer.load is defined in IntrinsicsAMDGPU.td so passes can query information about it other than just its attributes. --- src/gallium/auxiliary/gallivm/lp_bld_intr.c | 1 + src/gallium/auxiliary/gallivm/lp_bld_intr.h | 3 +- src/gallium/drivers/radeonsi/si_shader.c | 48 +++++++++++++++++----- src/gallium/drivers/radeonsi/si_shader_internal.h | 8 ++++ .../drivers/radeonsi/si_shader_tgsi_setup.c | 6 +++ 5 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c index 049671a..dc8de55 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -144,6 +144,7 @@ static const char *attr_to_str(enum lp_func_attr attr) { switch (attr) { case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; + case LP_FUNC_ATTR_ARGMEMONLY: return "argmemonly"; case LP_FUNC_ATTR_BYVAL: return "byval"; case LP_FUNC_ATTR_INREG: return "inreg"; case LP_FUNC_ATTR_NOALIAS: return "noalias"; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index f1e075a..7c8f09b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -54,7 +54,8 @@ enum lp_func_attr { LP_FUNC_ATTR_NOUNWIND = (1 << 4), LP_FUNC_ATTR_READNONE = (1 << 5), LP_FUNC_ATTR_READONLY = (1 << 6), - LP_FUNC_ATTR_LAST = (1 << 7) + LP_FUNC_ATTR_ARGMEMONLY = (1 << 7), + LP_FUNC_ATTR_LAST = (1 << 8) }; void diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a6de7c4..cf13cb5 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -93,11 +93,6 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, */ #define VS_EPILOG_PRIMID_LOC 2 -enum { - CONST_ADDR_SPACE = 2, - LOCAL_ADDR_SPACE = 3, -}; - #define SENDMSG_GS 2 #define SENDMSG_GS_DONE 3 @@ -360,8 +355,21 @@ static LLVMValueRef build_indexed_load_const( struct si_shader_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) { + LLVMTypeRef ptr_type = LLVMTypeOf(base_ptr); + LLVMTypeRef elem_type = LLVMGetElementType(ptr_type); + LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type); LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true); LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); + + /* Set !dereferenceable metadata */ + if (elem_kind == LLVMPointerTypeKind || + (elem_kind == LLVMArrayTypeKind && LLVMGetTypeKind(LLVMGetElementType(elem_type)) == LLVMPointerTypeKind)) { + LLVMValueRef deref_bytes, deref_md; + deref_bytes = LLVMConstInt(ctx->i64, UINT64_MAX, 0); + deref_md = LLVMMDNodeInContext(LLVMGetTypeContext(ptr_type), + &deref_bytes, 1); + LLVMSetMetadata(result, ctx->dereferenceable_md_kind, deref_md); + } return result; } @@ -1571,16 +1579,34 @@ static LLVMValueRef get_thread_id(struct si_shader_context *ctx) /** * Load a dword from a constant buffer. + * @param offset This is a byte offset. + * @returns An LLVMValueRef with f32 type. */ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx, LLVMValueRef resource, LLVMValueRef offset) { LLVMBuilderRef builder = ctx->gallivm.builder; - LLVMValueRef args[2] = {resource, offset}; + LLVMValueRef load; + LLVMValueRef args[3] = {resource, offset, LLVMConstInt(ctx->i1, 0, 0) }; + LLVMTypeRef resource_type = LLVMTypeOf(resource); + LLVMTypeKind resource_kind = LLVMGetTypeKind(resource_type); + + /* XXX: We can have a non-pointer resource if we do a constant load + * from the RW_BUFFERS whicha are still represented using the <16 x i8> + * type. We can eliminate this once we start using pointer types for + * those buffers. + */ + if (resource_kind != LLVMPointerTypeKind) { + return lp_build_intrinsic(builder, "llvm.SI.load.const", + ctx->f32, args, 2, + LP_FUNC_ATTR_READNONE); + } - return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2, - LP_FUNC_ATTR_READNONE); + load = lp_build_intrinsic(builder, "llvm.amdgcn.s.buffer.load.i32", + ctx->i32, args, 3, + LP_FUNC_ATTR_READONLY | LP_FUNC_ATTR_ARGMEMONLY); + return LLVMBuildBitCast(builder, load, ctx->f32, ""); } static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id) @@ -5504,9 +5530,10 @@ static void create_meta_data(struct si_shader_context *ctx) "invariant.load", 14); ctx->range_md_kind = LLVMGetMDKindIDInContext(gallivm->context, "range", 5); + ctx->dereferenceable_md_kind = LLVMGetMDKindIDInContext( + gallivm->context, "dereferenceable", 15); ctx->uniform_md_kind = LLVMGetMDKindIDInContext(gallivm->context, "amdgpu.uniform", 14); - ctx->empty_md = LLVMMDNodeInContext(gallivm->context, NULL, 0); } @@ -5601,7 +5628,7 @@ static void create_function(struct si_shader_context *ctx) v3i32 = LLVMVectorType(ctx->i32, 3); params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); - params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS); + params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->const_buffer_rsrc_type, SI_NUM_CONST_BUFFERS); params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS); params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES); params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS); @@ -7722,6 +7749,7 @@ si_get_shader_part(struct si_screen *sscreen, struct gallivm_state *gallivm = &ctx.gallivm; si_init_shader_ctx(&ctx, sscreen, &shader, tm); + create_meta_data(&ctx); ctx.type = type; switch (type) { diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 9055b4d..943b9a0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -127,6 +127,7 @@ struct si_shader_context { unsigned range_md_kind; unsigned uniform_md_kind; unsigned fpmath_md_kind; + unsigned dereferenceable_md_kind; LLVMValueRef fpmath_md_2p5_ulp; LLVMValueRef empty_md; @@ -150,10 +151,17 @@ struct si_shader_context { LLVMTypeRef v4i32; LLVMTypeRef v4f32; LLVMTypeRef v8i32; + LLVMTypeRef const_buffer_rsrc_type; LLVMValueRef shared_memory; }; +enum { + CONST_ADDR_SPACE = 2, + LOCAL_ADDR_SPACE = 3, + CONST_ADDR_SPACE_W_RSRC = 42, +}; + static inline struct si_shader_context * si_shader_context(struct lp_build_tgsi_context *bld_base) { diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 205686a..7a54e74 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -1364,6 +1364,12 @@ void si_llvm_context_init(struct si_shader_context *ctx, ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + ctx->const_buffer_rsrc_type = ctx->v16i8; + + if (HAVE_LLVM >= 0x0500) { + ctx->const_buffer_rsrc_type = + LLVMPointerType(ctx->i32, CONST_ADDR_SPACE_W_RSRC); + } } void si_llvm_create_func(struct si_shader_context *ctx, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev