On Wed, Dec 19, 2012 at 02:07:22PM -0500, Jerome Glisse wrote: > On Wed, Dec 19, 2012 at 12:33 PM, Tom Stellard <t...@stellard.net> wrote: > > On Sun, Dec 16, 2012 at 08:33:23PM +1000, Dave Airlie wrote: > >> From: Dave Airlie <airl...@redhat.com> > >> > >> This adds TBO support to r600g, and with GLSL 1.40 enabled, > >> we now get 3.1 core profiles advertised for r600g. > >> > >> This code is evergreen only so far, but I don't think there is > >> much to make it work on r600/700/cayman other than testing. > >> > >> a) buffer txq is broken like cube map txq, this sucks, fix it the > >> exact same way. > >> > >> b) buffer fetches are done with a vertex clause, > >> > >> c) vertex swizzling offsets are different than texture swizzles, > >> but we still need to use the combiner, so make it configurable. > >> > >> d) add implementation of UCMP. > >> > >> TODO: r600/700/cayman testin > >> Signed-off-by: Dave Airlie <airl...@redhat.com> > >> --- > >> src/gallium/drivers/r600/evergreen_state.c | 55 ++++++++++++++++++++ > >> src/gallium/drivers/r600/r600_asm.c | 2 +- > >> src/gallium/drivers/r600/r600_asm.h | 2 + > >> src/gallium/drivers/r600/r600_pipe.c | 4 +- > >> src/gallium/drivers/r600/r600_pipe.h | 10 +++- > >> src/gallium/drivers/r600/r600_shader.c | 75 > >> ++++++++++++++++++++++++++++ > >> src/gallium/drivers/r600/r600_shader.h | 1 + > >> src/gallium/drivers/r600/r600_state_common.c | 58 +++++++++++++++++---- > >> src/gallium/drivers/r600/r600_texture.c | 16 ++++-- > >> 9 files changed, 204 insertions(+), 19 deletions(-) > >> > > > > [snip] > > > >> diff --git a/src/gallium/drivers/r600/r600_shader.c > >> b/src/gallium/drivers/r600/r600_shader.c > >> index feb7001..60667e7 100644 > >> --- a/src/gallium/drivers/r600/r600_shader.c > >> +++ b/src/gallium/drivers/r600/r600_shader.c > >> @@ -3819,6 +3819,71 @@ static inline unsigned tgsi_tex_get_src_gpr(struct > >> r600_shader_ctx *ctx, > >> return ctx->file_offset[inst->Src[index].Register.File] + > >> inst->Src[index].Register.Index; > >> } > >> > >> +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean > >> src_requires_loading) > >> +{ > >> + struct r600_bytecode_vtx vtx; > >> + struct r600_bytecode_alu alu; > >> + struct tgsi_full_instruction *inst = > >> &ctx->parse.FullToken.FullInstruction; > >> + int src_gpr, r, i; > >> + > >> + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); > >> + if (src_requires_loading) { > >> + for (i = 0; i < 4; i++) { > >> + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > >> + alu.inst = > >> CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); > >> + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); > >> + alu.dst.sel = ctx->temp_reg; > >> + alu.dst.chan = i; > >> + if (i == 3) > >> + alu.last = 1; > >> + alu.dst.write = 1; > >> + r = r600_bytecode_add_alu(ctx->bc, &alu); > >> + if (r) > >> + return r; > >> + } > >> + src_gpr = ctx->temp_reg; > >> + } > >> + > >> + memset(&vtx, 0, sizeof(vtx)); > >> + vtx.inst = 0; > >> + vtx.buffer_id = tgsi_tex_get_src_gpr(ctx, 1) + > >> R600_MAX_CONST_BUFFERS;; > >> + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ > >> + vtx.src_gpr = src_gpr; > >> + vtx.mega_fetch_count = 16; > >> + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + > >> inst->Dst[0].Register.Index; > >> + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; > >> /* SEL_X */ > >> + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; > >> /* SEL_Y */ > >> + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; > >> /* SEL_Z */ > >> + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; > >> /* SEL_W */ > >> + vtx.use_const_fields = 1; > >> + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ > >> + > > > > According to the docs, srf_mode_all will be ignored if use_const_fields > > is set. However, based on my tests while running compute shaders, other > > fields like data_format, which are supposed to be ignored weren't being > > ignored unless the were set to zero. So, I think it would be safer > > here to set srf_mode_all to zero and make sure that bit gets set on > > the resource. > > > > > >> + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) > >> + return r; > >> + return 0; > >> +} > >> + > > > > Otherwise, this code for vtx fetch looks good to me. One problem I ran into > > with vtx fetch instructions while working on compute shaders was that > > the GPU will hang if you write to vtx.src_gpr in the > > instruction group following the vtx fetch. Here is a simple example: > > > > %T2_X<def> = MOV %ZERO > > %T3_X<def> = VTX_READ_eg %T2_X<kill>, 24 > > %T2_X<def> = MOV %ZERO > > > > I'm not sure if this happens on all GPU variants, but I was able to > > consistently reproduce this on my SUMO. You may want to keep an eye > > out for this in case you run into any unexplainable hangs. > > > > The vtx fetch group had the barrier flag set ? >
Yes, I've attached the full dump if you are interested. -Tom
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n8:16:32:64" target triple = "r600--" define void @loop_hang(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind { entry: %0 = call i32 @llvm.r600.read.local.size.x() nounwind %1 = call i32 @llvm.r600.read.tgid.x() nounwind %2 = call i32 @llvm.r600.read.tidig.x() nounwind %mul.i = mul i32 %0, %1 %add.i = add i32 %mul.i, %2 %cmp = icmp eq i32 %iterations, 0 br i1 %cmp, label %if.else, label %if.then if.then: ; preds = %entry %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add.i store i32 %iterations, i32 addrspace(1)* %arrayidx, align 4 br label %if.end if.else: ; preds = %entry %add = add i32 %add.i, 1 %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i32 %add store i32 0, i32 addrspace(1)* %arrayidx1, align 4 br label %if.end if.end: ; preds = %if.else, %if.then ret void } declare i32 @llvm.r600.read.tgid.x() nounwind readnone declare i32 @llvm.r600.read.local.size.x() nounwind readnone declare i32 @llvm.r600.read.tidig.x() nounwind readnone !opencl.kernels = !{!0} !0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_hang} # Machine code for function loop_hang: Post SSA, not tracking liveness Function Live Ins: %T0_X in %vreg3, %T1_X in %vreg4 BB#0: derived from LLVM BB %entry Live Ins: %T0_X %T1_X %T2_X<def> = MOV %ZERO %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24 %T2_X<def> = MOV %ZERO %T0_Y<def> = MULLO_INT_eg %T3_X<kill>, %T1_X<kill> %T0_Y<def> = ADD_INT %T0_Y<kill>, %T0_X<kill> %T0_X<def> = VTX_READ_PARAM_i32_eg %T2_X, 36 %T1_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 40 %T0_Z<def> = MOV_IMM_I32 %ALU_LITERAL_X, 0 %T0_Z<def> = SETE_INT %T1_X, %T0_Z<kill> IF_LOGICALNZ_i32 %T0_Z %T0_Z<def> = MOV_IMM_I32 %ALU_LITERAL_X, 2 %T0_Y<def> = LSHL_eg %T0_Y<kill>, %T0_Z<kill> %T0_X<def> = ADD_INT %T0_Y<kill>, %T0_X<kill> %T0_Y<def> = MOV_IMM_I32 %ALU_LITERAL_X, 4 %T0_X<def> = ADD_INT %T0_X<kill>, %T0_Y<kill> %T1_X<def> = MOV_IMM_I32 %ALU_LITERAL_X, 0 %T0_Y<def> = MOV %ALU_LITERAL_X, 2 %T0_X<def> = LSHR_eg %T0_X<kill>, %T0_Y<kill> RAT_WRITE_CACHELESS_eg %T1_X<kill>, %T0_X<kill> ELSE %T0_Z<def> = MOV_IMM_I32 %ALU_LITERAL_X, 2 %T0_Y<def> = LSHL_eg %T0_Y<kill>, %T0_Z<kill> %T0_X<def> = ADD_INT %T0_X<kill>, %T0_Y<kill> %T0_Y<def> = MOV %ALU_LITERAL_X, 2 %T0_X<def> = LSHR_eg %T0_X<kill>, %T0_Y<kill> RAT_WRITE_CACHELESS_eg %T1_X<kill>, %T0_X<kill> ENDIF RETURN # End machine code for function loop_hang. bytecode 90 dw -- 4 gprs --------------------- E 0000 0000000D ALU ADDR:26 KCACHE_MODE0:0 KCACHE_BANK0:0 KCACHE_BANK1:0 0001 A0000000 ALU INST:0x8 KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 COUNT:1 0026 800000F8 SRC0(SEL:248 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0027 00400C90 * INST:0x19 DST(SEL:2 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0002 0000000E TEX/VTX ADDR:28 0003 80400000 TEX/VTX INST:0x1 COUNT:1 0028 10020040 INST:0 FETCH_TYPE:2 BUFFER_ID:0 0029 135FF003 SRC(GPR:2 SEL_X:0) MEGA_FETCH_COUNT:4 DST(GPR:3 SEL_X:0 SEL_Y:7 SEL_Z:7 SEL_W:7) USE_CONST_FIELDS:0 FORMAT(DATA:13 NUM:1 COMP:0 MODE:0) 0030 00080018 ENDIAN:0 OFFSET:24 0031 00000000 0004 00000010 ALU ADDR:32 KCACHE_MODE0:0 KCACHE_BANK0:0 KCACHE_BANK1:0 0005 A0080000 ALU INST:0x8 KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 COUNT:3 0032 000000F8 SRC0(SEL:248 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:0) 0033 00400C90 INST:0x19 DST(SEL:2 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0034 80002003 SRC0(SEL:3 REL:0 CHAN:0 NEG:0) SRC1(SEL:1 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0035 20004790 * INST:0x8f DST(SEL:0 CHAN:1 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0036 800000FF SRC0(SEL:255 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0037 20001A10 * INST:0x34 DST(SEL:0 CHAN:1 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0006 00000014 TEX/VTX ADDR:40 0007 80400000 TEX/VTX INST:0x1 COUNT:1 0040 10020040 INST:0 FETCH_TYPE:2 BUFFER_ID:0 0041 135FF000 SRC(GPR:2 SEL_X:0) MEGA_FETCH_COUNT:4 DST(GPR:0 SEL_X:0 SEL_Y:7 SEL_Z:7 SEL_W:7) USE_CONST_FIELDS:0 FORMAT(DATA:13 NUM:1 COMP:0 MODE:0) 0042 00080024 ENDIAN:0 OFFSET:36 0043 00000000 0008 00000016 TEX/VTX ADDR:44 0009 80400000 TEX/VTX INST:0x1 COUNT:1 0044 10020040 INST:0 FETCH_TYPE:2 BUFFER_ID:0 0045 135FF001 SRC(GPR:2 SEL_X:0) MEGA_FETCH_COUNT:4 DST(GPR:1 SEL_X:0 SEL_Y:7 SEL_Z:7 SEL_W:7) USE_CONST_FIELDS:0 FORMAT(DATA:13 NUM:1 COMP:0 MODE:0) 0046 00080028 ENDIAN:0 OFFSET:40 0047 00000000 0010 00000018 ALU ADDR:48 KCACHE_MODE0:0 KCACHE_BANK0:0 KCACHE_BANK1:0 0011 A4080000 ALU INST:0x9 KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 COUNT:3 0048 800000F8 SRC0(SEL:248 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0049 40000C90 * INST:0x19 DST(SEL:0 CHAN:2 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0050 811FC001 SRC0(SEL:1 REL:0 CHAN:0 NEG:0) SRC1(SEL:254 REL:0 CHAN:2 NEG:0 IM:0) LAST:1) 0051 40001D10 * INST:0x3a DST(SEL:0 CHAN:2 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0052 801F08FE SRC0(SEL:254 REL:0 CHAN:2 NEG:0) SRC1(SEL:248 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0053 0000228C * INST:0x45 DST(SEL:0 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:0 OMOD:0 EXECUTE_MASK:1 UPDATE_PRED:1 0012 00000009 CF ADDR:18 0013 82800000 CF INST:0xa COND:0 POP_COUNT:0 0014 0000001B ALU ADDR:54 KCACHE_MODE0:0 KCACHE_BANK0:0 KCACHE_BANK1:0 0015 A0280000 ALU INST:0x8 KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 COUNT:11 0054 800000FD SRC0(SEL:253 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0055 40000C90 * INST:0x19 DST(SEL:0 CHAN:2 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0056 00000002 0.000000 (2) 0058 811FC400 SRC0(SEL:0 REL:0 CHAN:1 NEG:0) SRC1(SEL:254 REL:0 CHAN:2 NEG:0 IM:0) LAST:1) 0059 20000B90 * INST:0x17 DST(SEL:0 CHAN:1 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0060 000004FE SRC0(SEL:254 REL:0 CHAN:1 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:0) 0061 00001A10 INST:0x34 DST(SEL:0 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0062 800000FD SRC0(SEL:253 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0063 20000C90 * INST:0x19 DST(SEL:0 CHAN:1 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0064 00000004 0.000000 (4) 0066 009FC0FE SRC0(SEL:254 REL:0 CHAN:0 NEG:0) SRC1(SEL:254 REL:0 CHAN:1 NEG:0 IM:0) LAST:0) 0067 00001A10 INST:0x34 DST(SEL:0 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0068 000000FD SRC0(SEL:253 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:0) 0069 20000C90 INST:0x19 DST(SEL:0 CHAN:1 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0070 800000F8 SRC0(SEL:248 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0071 00200C90 * INST:0x19 DST(SEL:1 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0072 00000002 0.000000 (2) 0074 809FC0FE SRC0(SEL:254 REL:0 CHAN:0 NEG:0) SRC1(SEL:254 REL:0 CHAN:1 NEG:0 IM:0) LAST:1) 0075 00000B10 * INST:0x16 DST(SEL:0 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0016 0000A020 CF NATIVE 0017 95C01000 CF NATIVE 0018 0000000D CF ADDR:26 0019 83400001 CF INST:0xd COND:0 POP_COUNT:1 0020 00000026 ALU ADDR:76 KCACHE_MODE0:0 KCACHE_BANK0:0 KCACHE_BANK1:0 0021 A0180000 ALU INST:0x8 KCACHE_MODE1:0 KCACHE_ADDR0:0 KCACHE_ADDR1:0 COUNT:7 0076 800000FD SRC0(SEL:253 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0077 40000C90 * INST:0x19 DST(SEL:0 CHAN:2 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0078 00000002 0.000000 (2) 0080 811FC400 SRC0(SEL:0 REL:0 CHAN:1 NEG:0) SRC1(SEL:254 REL:0 CHAN:2 NEG:0 IM:0) LAST:1) 0081 20000B90 * INST:0x17 DST(SEL:0 CHAN:1 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0082 009FC000 SRC0(SEL:0 REL:0 CHAN:0 NEG:0) SRC1(SEL:254 REL:0 CHAN:1 NEG:0 IM:0) LAST:0) 0083 00001A10 INST:0x34 DST(SEL:0 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0084 800000FD SRC0(SEL:253 REL:0 CHAN:0 NEG:0) SRC1(SEL:0 REL:0 CHAN:0 NEG:0 IM:0) LAST:1) 0085 20000C90 * INST:0x19 DST(SEL:0 CHAN:1 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0086 00000002 0.000000 (2) 0088 809FC0FE SRC0(SEL:254 REL:0 CHAN:0 NEG:0) SRC1(SEL:254 REL:0 CHAN:1 NEG:0 IM:0) LAST:1) 0089 00000B10 * INST:0x16 DST(SEL:0 CHAN:0 REL:0 CLAMP:0) BANK_SWIZZLE:0 SRC0_ABS:0 SRC1_ABS:0 WRITE_MASK:1 OMOD:0 EXECUTE_MASK:0 UPDATE_PRED:0 0022 0000A020 CF NATIVE 0023 95C01000 CF NATIVE 0024 0000000D CF ADDR:26 0025 83800001 CF INST:0xe COND:0 POP_COUNT:1 --------------------------------------
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev