[Mesa-dev] radeonsi indirect sampler, llvm backend issue

Dave Airlie Sun, 12 Jul 2015 20:15:07 -0700

Hey,

So i tried to get ARB_gpu_shader5 indirect sampler support to work in radeonsi,


The attached patch to mesa adds support, but the llvm backend appears
to do the wrong thing, and I'm not sure how to fix it.

So the image sampling functions needs the resource and sampler address in SGPR,

So I generate a load of v8i32 for the resource, and v4i32 for the
sampler, however the backend translate the v4i32 load into a VGPR
based load, then it all chokes when it tries to pass that to the image
sample.

commenting out the following pattern in the backend makes things work
as a workaround.
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
I then get an s_load_dwordx4 instead of a buffer_load_dwordx4.

I've exhausted my knowledge of llvm already on this, so any ideas let me know.

Below is the tgsi/llvm inputs I create.

Dave.

FRAG
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL CONST[4]
DCL TEMP[0], LOCAL
DCL ADDR[0..2]
IMM[0] FLT32 {    0.7500,     0.2500,     0.0000,     0.0000}
  0: MOV TEMP[0].xy, IMM[0].xyyy
  1: UARL ADDR[2].x, CONST[4].xxxx
  2: TEX TEMP[0], TEMP[0], SAMP[ADDR[2].x], 2D
  3: MOV OUT[0], TEMP[0]
  4: END
; ModuleID = 'tgsi'

define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x
i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x
<8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2
x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float,
float, float, float, float, float, i32, float, float) #0 {
main_body:
  %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)*
%1, i64 0, i64 0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
  %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
  %25 = bitcast float %24 to i32
  %26 = sext i32 %25 to i64
  %27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)*
%3, i64 0, i64 %26
  %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0
  %29 = sext i32 %25 to i64
  %30 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)*
%2, i64 0, i64 %29
  %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0
  %32 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32
1061158912, i32 1048576000>, <8 x i32> %28, <4 x i32> %31, i32 15, i32
0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  %33 = extractelement <4 x float> %32, i32 0
  %34 = extractelement <4 x float> %32, i32 1
  %35 = extractelement <4 x float> %32, i32 2
  %36 = extractelement <4 x float> %32, i32 3
  %37 = call i32 @llvm.SI.packf16(float %33, float %34)
  %38 = bitcast i32 %37 to float
  %39 = call i32 @llvm.SI.packf16(float %35, float %36)
  %40 = bitcast i32 %39 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float
%38, float %40, float %38, float %40)
  ret void
}

From 23ee04b806875724ddeb6d9731f9bd42383272e8 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airl...@redhat.com>
Date: Mon, 13 Jul 2015 00:07:09 +0100
Subject: [PATCH] radeonsi: add support for indirect samplers

This is the necessary frontend work, the llvm backend
is producing the wrong code for the v4i32 load,
---
 src/gallium/drivers/radeonsi/si_shader.c | 70 ++++++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 75a29ae..38ad74e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1603,6 +1603,24 @@ static bool tgsi_is_shadow_sampler(unsigned target)
 
 static const struct lp_build_tgsi_action tex_action;
 
+/**
+ * Return the value of tgsi_ind_register for indexing.
+ * This is the indirect index with the constant offset added to it.
+ */
+static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
+				       const struct tgsi_ind_register *ind,
+				       int rel_index)
+{
+	struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+	LLVMValueRef result;
+
+	result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
+	result = LLVMBuildLoad(gallivm->builder, result, "");
+	result = LLVMBuildAdd(gallivm->builder, result,
+			      lp_build_const_int32(gallivm, rel_index), "");
+	return result;
+}
+
 static void tex_fetch_args(
 	struct lp_build_tgsi_context * bld_base,
 	struct lp_build_emit_data * emit_data)
@@ -1618,10 +1636,41 @@ static void tex_fetch_args(
 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
 	unsigned count = 0;
 	unsigned chan;
-	unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
-	unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+	unsigned sampler_src;
+	unsigned sampler_index;
 	bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
+	bool has_sampler_indirect = false;
+	LLVMValueRef res_ptr, samp_ptr;
 
+	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
+	sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+
+	if (emit_data->inst->Src[sampler_src].Register.Indirect) {
+		const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
+		LLVMValueRef temp_ptr;
+		LLVMValueRef ind_index;
+				
+		int first;
+		res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+		samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+
+
+		first = reg->Register.Index;
+
+		ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect,
+					   reg->Register.Index - first);
+
+		res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr,
+					     ind_index);
+
+		samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr,
+					      ind_index);
+		has_sampler_indirect = true;
+
+	} else {
+		res_ptr = si_shader_ctx->resources[sampler_index];
+		samp_ptr = si_shader_ctx->samplers[sampler_index];
+	}
 	if (target == TGSI_TEXTURE_BUFFER) {
 		LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
 		LLVMTypeRef v2i128 = LLVMVectorType(i128, 2);
@@ -1629,7 +1678,7 @@ static void tex_fetch_args(
 		LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
 
 		/* Bitcast and truncate v8i32 to v16i8. */
-		LLVMValueRef res = si_shader_ctx->resources[sampler_index];
+		LLVMValueRef res = res_ptr;
 		res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
 		res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
 		res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
@@ -1855,7 +1904,7 @@ static void tex_fetch_args(
 	}
 
 	/* Resource */
-	emit_data->args[1] = si_shader_ctx->resources[sampler_index];
+	emit_data->args[1] = res_ptr;
 
 	if (opcode == TGSI_OPCODE_TXF) {
 		/* add tex offsets */
@@ -1901,7 +1950,7 @@ static void tex_fetch_args(
 			4);
 	} else if (opcode == TGSI_OPCODE_TG4 ||
 		   opcode == TGSI_OPCODE_LODQ ||
-		   has_offset) {
+		   has_offset || has_sampler_indirect) {
 		unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
 				    target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
 				    target == TGSI_TEXTURE_2D_ARRAY ||
@@ -1938,7 +1987,7 @@ static void tex_fetch_args(
 			dmask = 1 << gather_comp;
 		}
 
-		emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+		emit_data->args[2] = samp_ptr;
 		emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
 		emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
 		emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
@@ -1954,7 +2003,7 @@ static void tex_fetch_args(
 			LLVMFloatTypeInContext(gallivm->context),
 			4);
 	} else {
-		emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+		emit_data->args[2] = samp_ptr;
 		emit_data->args[3] = lp_build_const_int32(gallivm, target);
 		emit_data->arg_count = 4;
 
@@ -1987,7 +2036,12 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 	char intr_name[127];
 	bool has_offset = HAVE_LLVM >= 0x0305 ?
 				emit_data->inst->Texture.NumOffsets > 0 : false;
+	bool has_sampler_indirect = false;
+	unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
 
+	if (emit_data->inst->Src[sampler_src].Register.Indirect) {
+		has_sampler_indirect = true;
+	}
 	if (target == TGSI_TEXTURE_BUFFER) {
 		emit_data->output[emit_data->chan] = build_intrinsic(
 			base->gallivm->builder,
@@ -1999,7 +2053,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 
 	if (opcode == TGSI_OPCODE_TG4 ||
 	    opcode == TGSI_OPCODE_LODQ ||
-	    (opcode != TGSI_OPCODE_TXF && has_offset)) {
+	    (opcode != TGSI_OPCODE_TXF && has_offset) || has_sampler_indirect) {
 		bool is_shadow = tgsi_is_shadow_sampler(target);
 		const char *name = "llvm.SI.image.sample";
 		const char *infix = "";
-- 
2.4.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] radeonsi indirect sampler, llvm backend issue

Reply via email to